Skip to content

Commit

Permalink
Merge pull request #1830 from fishtown-analytics/feature/improve-rpc-…
Browse files Browse the repository at this point in the history
…compile-performance

Feature/improve rpc compile performance (#1824)
  • Loading branch information
beckjake committed Oct 14, 2019
2 parents 2d100c3 + 43daea0 commit 6287d6d
Show file tree
Hide file tree
Showing 58 changed files with 1,291 additions and 1,158 deletions.
7 changes: 4 additions & 3 deletions core/dbt/adapters/base/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@

import dbt.exceptions
import dbt.flags
from dbt.config import Profile
from dbt.contracts.connection import Connection, Identifier, ConnectionState
from dbt.contracts.connection import (
Connection, Identifier, ConnectionState, HasCredentials
)
from dbt.logger import GLOBAL_LOGGER as logger


Expand All @@ -30,7 +31,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
"""
TYPE: str = NotImplemented

def __init__(self, profile: Profile):
def __init__(self, profile: HasCredentials):
self.profile = profile
self.thread_connections: Dict[Hashable, Connection] = {}
self.lock = multiprocessing.RLock()
Expand Down
10 changes: 5 additions & 5 deletions core/dbt/adapters/base/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
import dbt.flags

from dbt.clients.agate_helper import empty_table
from dbt.config import RuntimeConfig
from dbt.contracts.graph.manifest import Manifest
from dbt.node_types import NodeType
from dbt.loader import GraphLoader
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import filter_null_values

Expand Down Expand Up @@ -196,8 +194,8 @@ class BaseAdapter(metaclass=AdapterMeta):
# for use in materializations
AdapterSpecificConfigs: FrozenSet[str] = frozenset()

def __init__(self, config: RuntimeConfig):
self.config: RuntimeConfig = config
def __init__(self, config):
self.config = config
self.cache = RelationsCache()
self.connections = self.ConnectionManager(config)
self._internal_manifest_lazy: Optional[Manifest] = None
Expand Down Expand Up @@ -280,7 +278,9 @@ def check_internal_manifest(self) -> Optional[Manifest]:

def load_internal_manifest(self) -> Manifest:
if self._internal_manifest_lazy is None:
manifest = GraphLoader.load_internal(self.config)
# avoid a circular import
from dbt.parser.manifest import load_internal_manifest
manifest = load_internal_manifest(self.config)
self._internal_manifest_lazy = manifest
return self._internal_manifest_lazy

Expand Down
4 changes: 3 additions & 1 deletion core/dbt/adapters/base/plugin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import List, Optional, Type

from dbt.config.project import Project
from dbt.adapters.base import BaseAdapter, Credentials


Expand All @@ -18,6 +17,9 @@ def __init__(
include_path: str,
dependencies: Optional[List[str]] = None
):
# avoid an import cycle
from dbt.config.project import Project

self.adapter: Type[BaseAdapter] = adapter
self.credentials: Type[Credentials] = credentials
self.include_path: str = include_path
Expand Down
168 changes: 101 additions & 67 deletions core/dbt/adapters/factory.py
Original file line number Diff line number Diff line change
@@ -1,108 +1,142 @@
import threading
from importlib import import_module
from typing import Type, Dict, TypeVar
from typing import Type, Dict, Any

from dbt.exceptions import RuntimeException
from dbt.include.global_project import PACKAGES
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.contracts.connection import Credentials
from dbt.contracts.connection import Credentials, HasCredentials

from dbt.adapters.base.impl import BaseAdapter
from dbt.adapters.base.plugin import AdapterPlugin

# TODO: we can't import these because they cause an import cycle.
# currently RuntimeConfig needs to figure out default quoting for its adapter.
# We should push that elsewhere when we fixup project/profile stuff
# Instead here are some import loop avoiding-hacks right now. And Profile has
# to call into load_plugin to get credentials, so adapter/relation don't work
RuntimeConfig = TypeVar('RuntimeConfig')
BaseAdapter = TypeVar('BaseAdapter')
BaseRelation = TypeVar('BaseRelation')
# Profile has to call into load_plugin to get credentials, so adapter/relation
# don't work
BaseRelation = Any

ADAPTER_TYPES: Dict[str, Type[BaseAdapter]] = {}

_ADAPTERS: Dict[str, BaseAdapter] = {}
_ADAPTER_LOCK = threading.Lock()
Adapter = BaseAdapter


def get_adapter_class_by_name(adapter_name: str) -> Type[BaseAdapter]:
with _ADAPTER_LOCK:
if adapter_name in ADAPTER_TYPES:
return ADAPTER_TYPES[adapter_name]
class AdpaterContainer:
def __init__(self):
self.lock = threading.Lock()
self.adapters: Dict[str, Adapter] = {}
self.adapter_types: Dict[str, Type[Adapter]] = {}

adapter_names = ", ".join(ADAPTER_TYPES.keys())
def get_adapter_class_by_name(self, name: str) -> Type[Adapter]:
with self.lock:
if name in self.adapter_types:
return self.adapter_types[name]

message = "Invalid adapter type {}! Must be one of {}"
formatted_message = message.format(adapter_name, adapter_names)
raise RuntimeException(formatted_message)
names = ", ".join(self.adapter_types.keys())

message = f"Invalid adapter type {name}! Must be one of {names}"
raise RuntimeException(message)

def get_relation_class_by_name(adapter_name: str) -> Type[BaseRelation]:
adapter = get_adapter_class_by_name(adapter_name)
return adapter.Relation
def get_relation_class_by_name(self, name: str) -> Type[BaseRelation]:
adapter = self.get_adapter_class_by_name(name)
return adapter.Relation

def load_plugin(self, name: str) -> Type[Credentials]:
# this doesn't need a lock: in the worst case we'll overwrite PACKAGES
# and adapter_type entries with the same value, as they're all
# singletons
try:
mod = import_module('.' + name, 'dbt.adapters')
except ImportError as e:
logger.info("Error importing adapter: {}".format(e))
raise RuntimeException(
"Could not find adapter type {}!".format(name)
)
if not hasattr(mod, 'Plugin'):
raise RuntimeException(
f'Could not find plugin in {name} plugin module'
)
plugin: AdapterPlugin = mod.Plugin # type: ignore
plugin_type = plugin.adapter.type()

def load_plugin(adapter_name: str) -> Credentials:
# this doesn't need a lock: in the worst case we'll overwrite PACKAGES and
# _ADAPTER_TYPE entries with the same value, as they're all singletons
try:
mod = import_module('.' + adapter_name, 'dbt.adapters')
except ImportError as e:
logger.info("Error importing adapter: {}".format(e))
raise RuntimeException(
"Could not find adapter type {}!".format(adapter_name)
)
plugin = mod.Plugin
if plugin_type != name:
raise RuntimeException(
f'Expected to find adapter with type named {name}, got '
f'adapter with type {plugin_type}'
)

if plugin.adapter.type() != adapter_name:
raise RuntimeException(
'Expected to find adapter with type named {}, got adapter with '
'type {}'
.format(adapter_name, plugin.adapter.type())
)
with self.lock:
# things do hold the lock to iterate over it so we need it to add
self.adapter_types[name] = plugin.adapter

with _ADAPTER_LOCK:
# things do hold the lock to iterate over it so we need ot to add stuff
ADAPTER_TYPES[adapter_name] = plugin.adapter
PACKAGES[plugin.project_name] = plugin.include_path

PACKAGES[plugin.project_name] = plugin.include_path
for dep in plugin.dependencies:
self.load_plugin(dep)

for dep in plugin.dependencies:
load_plugin(dep)
return plugin.credentials

return plugin.credentials
def register_adapter(self, config: HasCredentials) -> None:
adapter_name = config.credentials.type
adapter_type = self.get_adapter_class_by_name(adapter_name)

with self.lock:
if adapter_name in self.adapters:
# this shouldn't really happen...
return

def get_adapter(config: RuntimeConfig) -> BaseAdapter:
adapter_name = config.credentials.type
adapter: Adapter = adapter_type(config) # type: ignore
self.adapters[adapter_name] = adapter

# Atomically check to see if we already have an adapter
if adapter_name in _ADAPTERS:
return _ADAPTERS[adapter_name]
def lookup_adapter(self, adapter_name: str) -> Adapter:
return self.adapters[adapter_name]

adapter_type = get_adapter_class_by_name(adapter_name)
def reset_adapters(self):
"""Clear the adapters. This is useful for tests, which change configs.
"""
with self.lock:
for adapter in self.adapters.values():
adapter.cleanup_connections()
self.adapters.clear()

with _ADAPTER_LOCK:
# check again, in case something was setting it before
if adapter_name in _ADAPTERS:
return _ADAPTERS[adapter_name]
def cleanup_connections(self):
"""Only clean up the adapter connections list without resetting the actual
adapters.
"""
with self.lock:
for adapter in self.adapters.values():
adapter.cleanup_connections()

adapter = adapter_type(config)
_ADAPTERS[adapter_name] = adapter
return adapter

FACTORY: AdpaterContainer = AdpaterContainer()


def register_adapter(config: HasCredentials) -> None:
FACTORY.register_adapter(config)


def get_adapter(config: HasCredentials):
return FACTORY.lookup_adapter(config.credentials.type)


def reset_adapters():
"""Clear the adapters. This is useful for tests, which change configs.
"""
with _ADAPTER_LOCK:
for adapter in _ADAPTERS.values():
adapter.cleanup_connections()
_ADAPTERS.clear()
FACTORY.reset_adapters()


def cleanup_connections():
"""Only clean up the adapter connections list without resetting the actual
adapters.
"""
with _ADAPTER_LOCK:
for adapter in _ADAPTERS.values():
adapter.cleanup_connections()
FACTORY.cleanup_connections()


def get_adapter_class_by_name(name: str) -> Type[BaseAdapter]:
return FACTORY.get_adapter_class_by_name(name)


def get_relation_class_by_name(name: str) -> Type[BaseRelation]:
return FACTORY.get_relation_class_by_name(name)


def load_plugin(name: str) -> Type[Credentials]:
return FACTORY.load_plugin(name)
13 changes: 6 additions & 7 deletions core/dbt/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@
import dbt.include
import dbt.tracking

from dbt.utils import get_materialization, NodeType, is_type
from dbt.node_types import NodeType
from dbt.linker import Linker

import dbt.context.runtime
import dbt.contracts.project
import dbt.exceptions
import dbt.flags
import dbt.loader
import dbt.config
from dbt.contracts.graph.compiled import InjectedCTE, COMPILED_TYPES
from dbt.contracts.graph.parsed import ParsedNode
Expand Down Expand Up @@ -141,7 +140,7 @@ def compile_node(self, node, manifest, extra_context=None):
# data tests get wrapped in count(*)
# TODO : move this somewhere more reasonable
if 'data' in injected_node.tags and \
is_type(injected_node, NodeType.Test):
injected_node.resource_type == NodeType.Test:
injected_node.wrapped_sql = (
"select count(*) as errors "
"from (\n{test_sql}\n) sbq").format(
Expand All @@ -150,14 +149,14 @@ def compile_node(self, node, manifest, extra_context=None):
# don't wrap schema tests or analyses.
injected_node.wrapped_sql = injected_node.injected_sql

elif is_type(injected_node, NodeType.Snapshot):
elif injected_node.resource_type == NodeType.Snapshot:
# unfortunately we do everything automagically for
# snapshots. in the future it'd be nice to generate
# the SQL at the parser level.
pass

elif(is_type(injected_node, NodeType.Model) and
get_materialization(injected_node) == 'ephemeral'):
elif(injected_node.resource_type == NodeType.Model and
injected_node.get_materialization() == 'ephemeral'):
pass

else:
Expand Down Expand Up @@ -220,7 +219,7 @@ def _is_writable(node):
if not node.injected_sql:
return False

if dbt.utils.is_type(node, NodeType.Snapshot):
if node.resource_type == NodeType.Snapshot:
return False

return True
Expand Down
4 changes: 2 additions & 2 deletions core/dbt/config/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from hologram import ValidationError

from dbt.adapters.factory import load_plugin
from dbt.clients.system import load_file_contents
from dbt.clients.yaml_helper import load_yaml_text
from dbt.contracts.project import ProfileConfig, UserConfig
Expand Down Expand Up @@ -121,6 +120,8 @@ def validate(self):

@staticmethod
def _credentials_from_profile(profile, profile_name, target_name):
# avoid an import cycle
from dbt.adapters.factory import load_plugin
# credentials carry their 'type' in their actual type, not their
# attributes. We do want this in order to pick our Credentials class.
if 'type' not in profile:
Expand All @@ -129,7 +130,6 @@ def _credentials_from_profile(profile, profile_name, target_name):
.format(profile_name, target_name))

typename = profile.pop('type')

try:
cls = load_plugin(typename)
credentials = cls.from_dict(profile)
Expand Down
4 changes: 4 additions & 0 deletions core/dbt/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from dbt.utils import parse_cli_vars
from dbt.source_config import SourceConfig

from dbt.contracts.graph.manifest import ManifestMetadata
from dbt.contracts.project import Project as ProjectContract
from dbt.contracts.project import PackageConfig

Expand Down Expand Up @@ -453,3 +454,6 @@ def validate_version(self):
]
)
raise DbtProjectError(msg)

def get_metadata(self) -> ManifestMetadata:
return ManifestMetadata(self.hashed_name())
2 changes: 1 addition & 1 deletion core/dbt/context/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def env_var(var, default=None):

def debug_here():
import sys
import ipdb
import ipdb # type: ignore
frame = sys._getframe(3)
ipdb.set_trace(frame)

Expand Down
Loading

0 comments on commit 6287d6d

Please sign in to comment.