Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make node selection O(n) #1615

Merged
merged 4 commits into from
Jul 17, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## dbt 0.14.1

### Breaking changes
- the undocumented "graph" variable was removed from the parsing context ([#1615](https://github.com/fishtown-analytics/dbt/pull/1615))

## dbt 0.14.0 - Wilt Chamberlain (July 10, 2019)

### Overview
Expand Down
2 changes: 0 additions & 2 deletions core/dbt/context/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,6 @@ def generate_base(model, model_dict, config, manifest, source_config,
"exceptions": dbt.exceptions.wrapped_exports(model),
"execute": provider.execute,
"flags": dbt.flags,
# TODO: Do we have to leave this in?
beckjake marked this conversation as resolved.
Show resolved Hide resolved
"graph": manifest.to_flat_graph(),
"log": log,
"model": model_dict,
"modules": get_context_modules(),
Expand Down
16 changes: 0 additions & 16 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,22 +379,6 @@ def patch_nodes(self, patches):
'not found or is disabled').format(patch.name)
)

def to_flat_graph(self):
"""Convert the parsed manifest to the 'flat graph' that the compiler
expects.

Kind of hacky note: everything in the code is happy to deal with
macros as ParsedMacro objects (in fact, it's been changed to require
that), so those can just be returned without any work. Nodes sadly
require a lot of work on the compiler side.

Ideally in the future we won't need to have this method.
"""
return {
'nodes': {k: v.to_shallow_dict() for k, v in self.nodes.items()},
'macros': self.macros,
}

def __getattr__(self, name):
raise AttributeError("'{}' object has no attribute '{}'".format(
type(self).__name__, name)
Expand Down
47 changes: 11 additions & 36 deletions core/dbt/graph/selector.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import networkx as nx
from dbt.logger import GLOBAL_LOGGER as logger

from dbt.utils import is_enabled, get_materialization, coalesce
from dbt.utils import is_enabled, coalesce
from dbt.node_types import NodeType
import dbt.exceptions

Expand Down Expand Up @@ -315,40 +315,6 @@ def get_selected(self, include, exclude, resource_types, tags, required):

return filtered_nodes

def is_ephemeral_model(self, node):
is_model = node.get('resource_type') == NodeType.Model
is_ephemeral = get_materialization(node) == 'ephemeral'
return is_model and is_ephemeral

def get_ancestor_ephemeral_nodes(self, selected_nodes):
node_names = {}
for node_id in selected_nodes:
if node_id not in self.manifest.nodes:
continue
node = self.manifest.nodes[node_id]
# sources don't have ancestors and this results in a silly select()
if node.resource_type == NodeType.Source:
continue
node_names[node_id] = node.name

include_spec = [
'+{}'.format(node_names[node])
for node in selected_nodes if node in node_names
]
if not include_spec:
return set()

all_ancestors = self.select_nodes(self.linker.graph, include_spec, [])

res = []
for ancestor in all_ancestors:
ancestor_node = self.manifest.nodes.get(ancestor, None)

if ancestor_node and self.is_ephemeral_model(ancestor_node):
res.append(ancestor)

return set(res)

def select(self, query):
include = query.get('include')
exclude = query.get('exclude')
Expand All @@ -359,6 +325,15 @@ def select(self, query):
selected = self.get_selected(include, exclude, resource_types, tags,
required)

addins = self.get_ancestor_ephemeral_nodes(selected)
# we used to carefully go through all node ancestors and add those if
# they were ephemeral. Sadly, the algorithm we used ended up being
# O(n^2). Instead, since ephemeral nodes are almost free, just add all
# ephemeral nodes in the graph.
# someday at large enough scale we might want to prune it to only be
# ancestors of the selected nodes so we can skip the compile.
addins = {
uid for uid, node in self.manifest.nodes.items()
if node.is_ephemeral_model
}

return selected | addins
33 changes: 0 additions & 33 deletions test/unit/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,19 +258,6 @@ def test__nested_nodes(self):
[]
)

def test__to_flat_graph(self):
nodes = copy.copy(self.nested_nodes)
manifest = Manifest(nodes=nodes, macros={}, docs={},
generated_at=timestring(), disabled=[])
flat_graph = manifest.to_flat_graph()
flat_nodes = flat_graph['nodes']
self.assertEqual(set(flat_graph), set(['nodes', 'macros']))
self.assertEqual(flat_graph['macros'], {})
self.assertEqual(set(flat_nodes), set(self.nested_nodes))
expected_keys = set(ParsedNode.SCHEMA['required']) | {'agate_table'}
for node in flat_nodes.values():
self.assertEqual(set(node), expected_keys)

@mock.patch.object(tracking, 'active_user')
def test_get_metadata(self, mock_user):
mock_user.id = 'cfc9500f-dc7f-4c83-9ea7-2c581c1b38cf'
Expand Down Expand Up @@ -622,23 +609,3 @@ def test__nested_nodes(self):
child_map['model.snowplow.events'],
[]
)

def test__to_flat_graph(self):
nodes = copy.copy(self.nested_nodes)
manifest = Manifest(nodes=nodes, macros={}, docs={},
generated_at=timestring(), disabled=[])
flat_graph = manifest.to_flat_graph()
flat_nodes = flat_graph['nodes']
self.assertEqual(set(flat_graph), set(['nodes', 'macros']))
self.assertEqual(flat_graph['macros'], {})
self.assertEqual(set(flat_nodes), set(self.nested_nodes))
parsed_keys = set(ParsedNode.SCHEMA['required']) | {'agate_table'}
compiled_keys = set(CompiledNode.SCHEMA['required']) | {'agate_table'}
compiled_count = 0
for node in flat_nodes.values():
if node.get('compiled'):
self.assertEqual(set(node), compiled_keys)
compiled_count += 1
else:
self.assertEqual(set(node), parsed_keys)
self.assertEqual(compiled_count, 2)
152 changes: 73 additions & 79 deletions test/unit/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1692,88 +1692,82 @@ def test__process_refs__packages(self):

processed_manifest = ParserUtils.process_refs(manifest, 'root')
self.assertEqual(
processed_manifest.to_flat_graph(),
processed_manifest.serialize()['nodes'],
{
'macros': {},
'nodes': {
'model.snowplow.events': {
'name': 'events',
'alias': 'events',
'database': 'test',
'schema': 'analytics',
'resource_type': 'model',
'unique_id': 'model.snowplow.events',
'fqn': ['snowplow', 'events'],
'empty': False,
'package_name': 'snowplow',
'refs': [],
'sources': [],
'depends_on': {
'nodes': [],
'macros': []
},
'config': self.disabled_config,
'tags': [],
'path': 'events.sql',
'original_file_path': 'events.sql',
'root_path': get_os_path('/usr/src/app'),
'raw_sql': 'does not matter',
'agate_table': None,
'columns': {},
'description': '',
'model.snowplow.events': {
'name': 'events',
'alias': 'events',
'database': 'test',
'schema': 'analytics',
'resource_type': 'model',
'unique_id': 'model.snowplow.events',
'fqn': ['snowplow', 'events'],
'empty': False,
'package_name': 'snowplow',
'refs': [],
'sources': [],
'depends_on': {
'nodes': [],
'macros': []
},
'model.root.events': {
'name': 'events',
'alias': 'events',
'database': 'test',
'schema': 'analytics',
'resource_type': 'model',
'unique_id': 'model.root.events',
'fqn': ['root', 'events'],
'empty': False,
'package_name': 'root',
'refs': [],
'sources': [],
'depends_on': {
'nodes': [],
'macros': []
},
'config': self.model_config,
'tags': [],
'path': 'events.sql',
'original_file_path': 'events.sql',
'root_path': get_os_path('/usr/src/app'),
'raw_sql': 'does not matter',
'agate_table': None,
'columns': {},
'description': '',
'config': self.disabled_config,
'tags': [],
'path': 'events.sql',
'original_file_path': 'events.sql',
'root_path': get_os_path('/usr/src/app'),
'raw_sql': 'does not matter',
'columns': {},
'description': '',
},
'model.root.events': {
'name': 'events',
'alias': 'events',
'database': 'test',
'schema': 'analytics',
'resource_type': 'model',
'unique_id': 'model.root.events',
'fqn': ['root', 'events'],
'empty': False,
'package_name': 'root',
'refs': [],
'sources': [],
'depends_on': {
'nodes': [],
'macros': []
},
'model.root.dep': {
'name': 'dep',
'alias': 'dep',
'database': 'test',
'schema': 'analytics',
'resource_type': 'model',
'unique_id': 'model.root.dep',
'fqn': ['root', 'dep'],
'empty': False,
'package_name': 'root',
'refs': [['events']],
'sources': [],
'depends_on': {
'nodes': ['model.root.events'],
'macros': []
},
'config': self.model_config,
'tags': [],
'path': 'multi.sql',
'original_file_path': 'multi.sql',
'root_path': get_os_path('/usr/src/app'),
'raw_sql': 'does not matter',
'agate_table': None,
'columns': {},
'description': '',
}
'config': self.model_config,
'tags': [],
'path': 'events.sql',
'original_file_path': 'events.sql',
'root_path': get_os_path('/usr/src/app'),
'raw_sql': 'does not matter',
'columns': {},
'description': '',
},
'model.root.dep': {
'name': 'dep',
'alias': 'dep',
'database': 'test',
'schema': 'analytics',
'resource_type': 'model',
'unique_id': 'model.root.dep',
'fqn': ['root', 'dep'],
'empty': False,
'package_name': 'root',
'refs': [['events']],
'sources': [],
'depends_on': {
'nodes': ['model.root.events'],
'macros': []
},
'config': self.model_config,
'tags': [],
'path': 'multi.sql',
'original_file_path': 'multi.sql',
'root_path': get_os_path('/usr/src/app'),
'raw_sql': 'does not matter',
'columns': {},
'description': '',
}
}
)
Expand Down