Skip to content

Commit

Permalink
Merge pull request #1610 from fishtown-analytics/feature/split-parsed…
Browse files Browse the repository at this point in the history
…-things

Split Parsed and Compiled nodes into subtypes (#1601)
  • Loading branch information
beckjake authored Jul 18, 2019
2 parents 72afd76 + b9a3fe5 commit 62c3318
Show file tree
Hide file tree
Showing 22 changed files with 445 additions and 244 deletions.
19 changes: 11 additions & 8 deletions core/dbt/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,21 @@
import dbt.flags
import dbt.loader
import dbt.config
from dbt.contracts.graph.compiled import InjectedCTE, CompiledNode, \
CompiledTestNode
from dbt.contracts.graph.compiled import InjectedCTE, COMPILED_TYPES
from dbt.contracts.graph.parsed import ParsedNode

from dbt.logger import GLOBAL_LOGGER as logger

graph_file_name = 'graph.gpickle'


def _compiled_type_for(model):
if model.resource_type == NodeType.Test:
return CompiledTestNode
else:
return CompiledNode
def _compiled_type_for(model: ParsedNode):
if model.resource_type not in COMPILED_TYPES:
raise dbt.exceptions.InternalException(
'Asked to compile {} node, but it has no compiled form'
.format(model.resource_type)
)
return COMPILED_TYPES[model.resource_type]


def print_compile_stats(stats):
Expand Down Expand Up @@ -75,7 +77,8 @@ def recursively_prepend_ctes(model, manifest):
return (model, model.extra_ctes, manifest)

if dbt.flags.STRICT_MODE:
assert isinstance(model, (CompiledNode, CompiledTestNode))
assert isinstance(model, tuple(COMPILED_TYPES.values())), \
'Bad model type: {}'.format(type(model))

prepended_ctes = []

Expand Down
97 changes: 84 additions & 13 deletions core/dbt/contracts/graph/compiled.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,27 @@
from dbt.contracts.graph.parsed import (
ParsedNodeMixins, ParsedNode, ParsedSourceDefinition,
ParsedNodeDefaults, TestType, ParsedTestNode, TestConfig
ParsedNode,
ParsedAnalysisNode,
ParsedDocumentation,
ParsedMacro,
ParsedModelNode,
ParsedHookNode,
ParsedRPCNode,
ParsedSeedNode,
ParsedSnapshotNode,
ParsedSourceDefinition,
ParsedTestNode,
TestConfig,
)
from dbt.node_types import (
NodeType,
AnalysisType,
ModelType,
OperationType,
RPCCallType,
SeedType,
SnapshotType,
TestType,
)

from dbt.contracts.util import Replaceable

from hologram import JsonSchemaMixin
Expand All @@ -23,7 +42,7 @@ class InjectedCTE(JsonSchemaMixin, Replaceable):


@dataclass
class CompiledNodeDefaults(ParsedNodeDefaults, ParsedNodeMixins):
class CompiledNode(ParsedNode):
compiled: bool = False
compiled_sql: Optional[str] = None
extra_ctes_injected: bool = False
Expand Down Expand Up @@ -53,18 +72,38 @@ def set_cte(self, cte_id: str, sql: str):


@dataclass
class CompiledNode(CompiledNodeDefaults):
class CompiledAnalysisNode(CompiledNode):
resource_type: AnalysisType


@dataclass
class CompiledHookNode(CompiledNode):
resource_type: OperationType
index: Optional[int] = None

@classmethod
def from_parsed_node(cls, parsed, **kwargs):
dct = parsed.to_dict()
dct.update(kwargs)
return cls.from_dict(dct)

@dataclass
class CompiledModelNode(CompiledNode):
resource_type: ModelType


@dataclass
class CompiledRPCNode(CompiledNode):
resource_type: RPCCallType


@dataclass
class CompiledSeedNode(CompiledNode):
resource_type: SeedType


@dataclass
class CompiledSnapshotNode(CompiledNode):
resource_type: SnapshotType


@dataclass
class CompiledTestNode(CompiledNodeDefaults):
class CompiledTestNode(CompiledNode):
resource_type: TestType
column_name: Optional[str] = None
config: TestConfig = field(default_factory=TestConfig)
Expand Down Expand Up @@ -130,11 +169,43 @@ def _inject_ctes_into_sql(sql: str, ctes: List[InjectedCTE]) -> str:
return str(parsed)


COMPILED_TYPES = {
NodeType.Analysis: CompiledAnalysisNode,
NodeType.Model: CompiledModelNode,
NodeType.Operation: CompiledHookNode,
NodeType.RPCCall: CompiledRPCNode,
NodeType.Seed: CompiledSeedNode,
NodeType.Snapshot: CompiledSnapshotNode,
NodeType.Test: CompiledTestNode,
}


def compiled_type_for(parsed: ParsedNode):
if parsed.resource_type in COMPILED_TYPES:
return COMPILED_TYPES[parsed.resource_type]
else:
return type(parsed)


# We allow either parsed or compiled nodes, or parsed sources, as some
# 'compile()' calls in the runner actually just return the original parsed
# node they were given.
CompileResultNode = Union[
CompiledNode, ParsedNode,
CompiledTestNode, ParsedTestNode,
CompiledAnalysisNode,
CompiledModelNode,
CompiledHookNode,
CompiledRPCNode,
CompiledSeedNode,
CompiledSnapshotNode,
CompiledTestNode,
ParsedAnalysisNode,
ParsedDocumentation,
ParsedMacro,
ParsedModelNode,
ParsedHookNode,
ParsedRPCNode,
ParsedSeedNode,
ParsedSnapshotNode,
ParsedSourceDefinition,
ParsedTestNode,
]
47 changes: 43 additions & 4 deletions core/dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
from dbt.contracts.util import Replaceable
from dbt.logger import GLOBAL_LOGGER as logger # noqa
from dbt.node_types import (
NodeType, SourceType, SnapshotType, MacroType, TestType
NodeType, SourceType, SnapshotType, MacroType, TestType, OperationType,
SeedType, ModelType, AnalysisType, RPCCallType
)


Expand Down Expand Up @@ -190,20 +191,44 @@ class ParsedNodeDefaults(ParsedNodeMandatory):
build_path: Optional[str] = None


# TODO(jeb): hooks should get their own parsed type instead of including
# index everywhere!
@dataclass
class ParsedNode(ParsedNodeDefaults, ParsedNodeMixins):
pass


@dataclass
class ParsedAnalysisNode(ParsedNode):
resource_type: AnalysisType


@dataclass
class ParsedHookNode(ParsedNode):
resource_type: OperationType
index: Optional[int] = None


@dataclass
class ParsedModelNode(ParsedNode):
resource_type: ModelType


@dataclass
class ParsedRPCNode(ParsedNode):
resource_type: RPCCallType


@dataclass
class ParsedSeedNode(ParsedNode):
resource_type: SeedType


@dataclass
class TestConfig(NodeConfig):
severity: Severity = 'error'


@dataclass
class ParsedTestNode(ParsedNodeDefaults, ParsedNodeMixins):
class ParsedTestNode(ParsedNode):
resource_type: TestType
column_name: Optional[str] = None
config: TestConfig = field(default_factory=TestConfig)
Expand Down Expand Up @@ -415,3 +440,17 @@ def tags(self):
@property
def has_freshness(self):
return bool(self.freshness) and self.loaded_at_field is not None


PARSED_TYPES = {
NodeType.Analysis: ParsedAnalysisNode,
NodeType.Documentation: ParsedDocumentation,
NodeType.Macro: ParsedMacro,
NodeType.Model: ParsedModelNode,
NodeType.Operation: ParsedHookNode,
NodeType.RPCCall: ParsedRPCNode,
NodeType.Seed: ParsedSeedNode,
NodeType.Snapshot: ParsedSnapshotNode,
NodeType.Source: ParsedSourceDefinition,
NodeType.Test: ParsedTestNode,
}
33 changes: 26 additions & 7 deletions core/dbt/node_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,44 @@ class UnparsedNodeType(StrEnum):
RPCCall = str(NodeType.RPCCall)


class RunHookType(StrEnum):
Start = 'on-run-start'
End = 'on-run-end'

# It would be nice to use hologram.StrLiteral for these, but it results in
# un-pickleable types :(


class AnalysisType(StrEnum):
Analysis = str(NodeType.Analysis)


class DocumentationType(StrEnum):
Documentation = str(NodeType.Documentation)


class RunHookType(StrEnum):
Start = 'on-run-start'
End = 'on-run-end'
class MacroType(StrEnum):
Macro = str(NodeType.Macro)


class ModelType(StrEnum):
Model = str(NodeType.Model)


class OperationType(StrEnum):
Operation = str(NodeType.Operation)


class SnapshotType(StrEnum):
Snapshot = str(NodeType.Snapshot)
class RPCCallType(StrEnum):
RPCCall = str(NodeType.RPCCall)


class MacroType(StrEnum):
Macro = str(NodeType.Macro)
class SeedType(StrEnum):
Seed = str(NodeType.Seed)


class SnapshotType(StrEnum):
Snapshot = str(NodeType.Snapshot)


class SourceType(StrEnum):
Expand Down
17 changes: 15 additions & 2 deletions core/dbt/parser/analysis.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
import os
from typing import Dict, Any

from dbt.contracts.graph.parsed import ParsedAnalysisNode, ParsedRPCNode
from dbt.parser.base_sql import BaseSqlParser
import os


class AnalysisParser(BaseSqlParser):
@classmethod
def get_compiled_path(cls, name, relative_path):
return os.path.join('analysis', relative_path)

def parse_from_dict(
self,
parsed_dict: Dict[str, Any]
) -> ParsedAnalysisNode:
"""Given a dictionary, return the parsed entity for this parser"""
return ParsedAnalysisNode.from_dict(parsed_dict)

class RPCCallParser(AnalysisParser):

class RPCCallParser(BaseSqlParser):
def get_compiled_path(cls, name, relative_path):
return os.path.join('rpc', relative_path)

def parse_from_dict(self, parsed_dict: Dict[str, Any]) -> ParsedRPCNode:
"""Given a dictionary, return the parsed entity for this parser"""
return ParsedRPCNode.from_dict(parsed_dict)
12 changes: 7 additions & 5 deletions core/dbt/parser/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import abc
import os
from typing import Dict, Any

import dbt.exceptions
import dbt.flags
Expand All @@ -11,14 +13,13 @@
from dbt.include.global_project import PROJECT_NAME as GLOBAL_PROJECT_NAME
from dbt.utils import coalesce
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.contracts.graph.parsed import ParsedNode
from dbt.contracts.project import ProjectList
from dbt.parser.source_config import SourceConfig
from dbt import deprecations
from dbt import hooks


class BaseParser:
class BaseParser(metaclass=abc.ABCMeta):
def __init__(self, root_project_config, all_projects: ProjectList):
self.root_project_config = root_project_config
self.all_projects = all_projects
Expand Down Expand Up @@ -251,8 +252,9 @@ def _update_parsed_node_info(self, parsed_node, config):
self._mangle_hooks(config_dict)
parsed_node.config = parsed_node.config.from_dict(config_dict)

def _parse_from_dict(self, parsed_dict):
return ParsedNode.from_dict(parsed_dict)
@abc.abstractmethod
def parse_from_dict(self, parsed_dict: Dict[str, Any]) -> Any:
"""Given a dictionary, return the parsed entity for this parser"""

def parse_node(self, node, node_path, package_project_config, tags=None,
fqn_extra=None, fqn=None, snapshot_config=None,
Expand Down Expand Up @@ -281,7 +283,7 @@ def parse_node(self, node, node_path, package_project_config, tags=None,
config, node.to_dict(), node_path, config, tags, fqn,
snapshot_config, column_name
)
parsed_node = self._parse_from_dict(parsed_dict)
parsed_node = self.parse_from_dict(parsed_dict)

self._render_with_context(parsed_node, config)
self._update_parsed_node_info(parsed_node, config)
Expand Down
6 changes: 6 additions & 0 deletions core/dbt/parser/data_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Dict, Any

from dbt.contracts.graph.parsed import ParsedTestNode
from dbt.parser.base_sql import BaseSqlParser
import dbt.utils

Expand All @@ -7,3 +9,7 @@ class DataTestParser(BaseSqlParser):
@classmethod
def get_compiled_path(cls, name, relative_path):
return dbt.utils.get_pseudo_test_path(name, relative_path, 'data_test')

def parse_from_dict(self, parsed_dict: Dict[str, Any]) -> ParsedTestNode:
"""Given a dictionary, return the parsed entity for this parser"""
return ParsedTestNode.from_dict(parsed_dict)
Loading

0 comments on commit 62c3318

Please sign in to comment.