Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a path: selector to the node selector (#454) #2258

Merged
merged 2 commits into from
Apr 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- Support adapter-specific aliases (like `project` and `dataset` on BigQuery) in source definitions. ([#2133](https://github.com/fishtown-analytics/dbt/issues/2133), [#2244](https://github.com/fishtown-analytics/dbt/pull/2244))
- Users can now use jinja as arguments to tests. Test arguments are rendered in the native context and injected into the test execution context directly. ([#2149](https://github.com/fishtown-analytics/dbt/issues/2149), [#2220](https://github.com/fishtown-analytics/dbt/pull/2220))
- Added support for `db_groups` and `autocreate` flags in Redshift configurations. ([#1995](https://github.com/fishtown-analytics/dbt/issues/1995, [#2262]https://github.com/fishtown-analytics/dbt/pull/2262))
- Users can supply paths as arguments to `--models` and `--select`, either explicitily by prefixing with `path:` or implicitly with no prefix. ([#454](https://github.com/fishtown-analytics/dbt/issues/454), [#2258](https://github.com/fishtown-analytics/dbt/pull/2258))

### Fixes
- When a jinja value is undefined, give a helpful error instead of failing with cryptic "cannot pickle ParserMacroCapture" errors ([#2110](https://github.com/fishtown-analytics/dbt/issues/2110), [#2184](https://github.com/fishtown-analytics/dbt/pull/2184))
Expand Down
52 changes: 50 additions & 2 deletions core/dbt/graph/selector.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
from enum import Enum
from itertools import chain
from pathlib import Path
from typing import Set, Iterable, Union, List, Container, Tuple, Optional

import networkx as nx # type: ignore
Expand All @@ -16,13 +18,24 @@
SELECTOR_DELIMITER = ':'


def _probably_path(value: str):
"""Decide if value is probably a path. Windows has two path separators, so
we should check both sep ('\\') and altsep ('/') there.
"""
if os.path.sep in value:
return True
elif os.path.altsep is not None and os.path.altsep in value:
return True
else:
return False


class SelectionCriteria:
def __init__(self, node_spec: str):
self.raw = node_spec
self.select_children = False
self.select_parents = False
self.select_childrens_parents = False
self.selector_type = SELECTOR_FILTERS.FQN

if node_spec.startswith(SELECTOR_CHILDREN_AND_ANCESTORS):
self.select_childrens_parents = True
Expand All @@ -48,12 +61,19 @@ def __init__(self, node_spec: str):
self.selector_type = SELECTOR_FILTERS(selector_type)
else:
self.selector_value = node_spec
# if the selector type has an OS path separator in it, it can't
# really be a valid file name, so assume it's a path.
if _probably_path(node_spec):
self.selector_type = SELECTOR_FILTERS.PATH
else:
self.selector_type = SELECTOR_FILTERS.FQN


class SELECTOR_FILTERS(str, Enum):
FQN = 'fqn'
TAG = 'tag'
SOURCE = 'source'
PATH = 'path'

def __str__(self):
return self._value_
Expand Down Expand Up @@ -219,6 +239,29 @@ def search(self, included_nodes, selector):
yield node


class PathSelector(ManifestSelector):
FILTER = SELECTOR_FILTERS.PATH

def search(self, included_nodes, selector):
"""Yield all nodes in the graph that match the given path.

:param str selector: The path selector
"""
# use '.' and not 'root' for easy comparison
root = Path.cwd()
paths = set(p.relative_to(root) for p in root.glob(selector))
search = chain(self.parsed_nodes(included_nodes),
self.source_nodes(included_nodes))
for node, real_node in search:
if Path(real_node.root_path) != root:
continue
ofp = Path(real_node.original_file_path)
if ofp in paths:
yield node
elif any(parent in paths for parent in ofp.parents):
yield node


class InvalidSelectorError(Exception):
pass

Expand All @@ -231,7 +274,12 @@ class MultiSelector:
selector types, including the glob operator, but does not handle any graph
related behavior.
"""
SELECTORS = [QualifiedNameSelector, TagSelector, SourceSelector]
SELECTORS = [
QualifiedNameSelector,
TagSelector,
SourceSelector,
PathSelector,
]

def __init__(self, manifest):
self.manifest = manifest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,26 @@ def test_postgres_local_dependency(self):
2
)

@use_profile('postgres')
def test_postgres_no_dependency_paths(self):
self.run_dbt(['deps'])
self.run_dbt(['seed'])
# this should work
local_path = os.path.join('local_models', 'my_model.sql')
results = self.run_dbt(
['run', '--models', f'+{local_path}']
)
# should run the dependency and my_model
self.assertEqual(len(results), 2)

# this should not work
dep_path = os.path.join('models', 'model_to_import.sql')
results = self.run_dbt(
['run', '--models', f'+{dep_path}'],
)
# should not run the dependency, because it "doesn't exist".
self.assertEqual(len(results), 0)


class TestMissingDependency(DBTIntegrationTest):
@property
Expand Down
17 changes: 13 additions & 4 deletions test/integration/007_graph_selection_tests/test_graph_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ def test__snowflake__specific_model_and_children(self):
self.assertFalse('BASE_USERS' in created_models)
self.assertFalse('EMAILS' in created_models)


@use_profile('postgres')
def test__postgres__specific_model_and_parents(self):
self.run_sql_file("seed.sql")
Expand Down Expand Up @@ -143,13 +142,12 @@ def test__snowflake__specific_model_and_parents(self):
self.assertFalse('BASE_USERS' in created_models)
self.assertFalse('EMAILS' in created_models)


@use_profile('postgres')
def test__postgres__specific_model_with_exclusion(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(
['run', '--models', '+users_rollup', '--exclude', 'users_rollup']
['run', '--models', '+users_rollup', '--exclude', 'models/users_rollup.sql']
)
self.assertEqual(len(results), 1)

Expand Down Expand Up @@ -188,6 +186,17 @@ def test__postgres__locally_qualified_name(self):
self.assertIn('nested_users', created_models)
self.assert_correct_schemas()

results = self.run_dbt(['run', '--models', 'models/test/subdir*'])
self.assertEqual(len(results), 2)

created_models = self.get_models_in_schema()
self.assertNotIn('users_rollup', created_models)
self.assertNotIn('base_users', created_models)
self.assertNotIn('emails', created_models)
self.assertIn('subdir', created_models)
self.assertIn('nested_users', created_models)
self.assert_correct_schemas()

@use_profile('postgres')
def test__postgres__childrens_parents(self):
self.run_sql_file("seed.sql")
Expand Down Expand Up @@ -218,7 +227,7 @@ def test__postgres__more_childrens_parents(self):
@use_profile('snowflake')
def test__snowflake__skip_intermediate(self):
self.run_sql_file("seed.sql")
results = self.run_dbt(['run', '--models', '@users'])
results = self.run_dbt(['run', '--models', '@models/users.sql'])
# base_users, emails, users_rollup, users_rollup_dependency
self.assertEqual(len(results), 4)

Expand Down