Skip to content

Commit

Permalink
Add tracking event for full re-parse reasoning (#3652)
Browse files Browse the repository at this point in the history
* add tracking event for full reparse reason

* update changelog
  • Loading branch information
Kyle Wigley authored Jul 30, 2021
1 parent 2adf982 commit 2679792
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
- Better interaction between `dbt init` and adapters. Avoid raising errors while initializing a project ([#2814](https://github.com/fishtown-analytics/dbt/pull/2814), [#3483](https://github.com/fishtown-analytics/dbt/pull/3483))
- Update `create_adapter_plugins` script to include latest accessories, and stay up to date with latest dbt-core version ([#3002](https://github.com/fishtown-analytics/dbt/issues/3002), [#3509](https://github.com/fishtown-analytics/dbt/pull/3509))
- Scrub environment secrets from logs and console output ([#3617](https://github.com/dbt-labs/dbt/pull/3617))
- Add tracking for determine why `dbt` needs to re-parse entire project when partial parsing is enabled ([#3572](https://github.com/dbt-labs/dbt/issues/3572), [#3652](https://github.com/dbt-labs/dbt/pull/3652))

### Dependencies
- Require `werkzeug>=1`
Expand Down
36 changes: 30 additions & 6 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dataclasses import field
import os
from typing import (
Dict, Optional, Mapping, Callable, Any, List, Type, Union
Dict, Optional, Mapping, Callable, Any, List, Type, Union, Tuple
)
import time

Expand Down Expand Up @@ -59,13 +59,23 @@
from dbt.ui import warning_tag
from dbt.version import __version__

from dbt.dataclass_schema import dbtClassMixin
from dbt.dataclass_schema import StrEnum, dbtClassMixin

PARTIAL_PARSE_FILE_NAME = 'partial_parse.msgpack'
PARSING_STATE = DbtProcessState('parsing')
DEFAULT_PARTIAL_PARSE = False


class ReparseReason(StrEnum):
version_mismatch = '01_version_mismatch'
file_not_found = '02_file_not_found'
vars_changed = '03_vars_changed'
profile_changed = '04_profile_changed'
deps_changed = '05_deps_changed'
project_config_changed = '06_project_config_changed'
load_file_failure = '07_load_file_failure'


# Part of saved performance info
@dataclass
class ParserInfo(dbtClassMixin):
Expand Down Expand Up @@ -441,24 +451,28 @@ def write_manifest_for_partial_parse(self):
except Exception:
raise

def matching_parse_results(self, manifest: Manifest) -> bool:
def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]:
"""Compare the global hashes of the read-in parse results' values to
the known ones, and return if it is ok to re-use the results.
"""
valid = True
reparse_reason = None

if manifest.metadata.dbt_version != __version__:
logger.info("Unable to do partial parsing because of a dbt version mismatch")
return False # If the version is wrong, the other checks might not work
# If the version is wrong, the other checks might not work
return False, ReparseReason.version_mismatch
if self.manifest.state_check.vars_hash != manifest.state_check.vars_hash:
logger.info("Unable to do partial parsing because config vars, "
"config profile, or config target have changed")
valid = False
reparse_reason = ReparseReason.vars_changed
if self.manifest.state_check.profile_hash != manifest.state_check.profile_hash:
# Note: This should be made more granular. We shouldn't need to invalidate
# partial parsing if a non-used profile section has changed.
logger.info("Unable to do partial parsing because profile has changed")
valid = False
reparse_reason = ReparseReason.profile_changed

missing_keys = {
k for k in self.manifest.state_check.project_hashes
Expand All @@ -467,6 +481,7 @@ def matching_parse_results(self, manifest: Manifest) -> bool:
if missing_keys:
logger.info("Unable to do partial parsing because a project dependency has been added")
valid = False
reparse_reason = ReparseReason.deps_changed

for key, new_value in self.manifest.state_check.project_hashes.items():
if key in manifest.state_check.project_hashes:
Expand All @@ -475,7 +490,8 @@ def matching_parse_results(self, manifest: Manifest) -> bool:
logger.info("Unable to do partial parsing because "
"a project config has changed")
valid = False
return valid
reparse_reason = ReparseReason.project_config_changed
return valid, reparse_reason

def _partial_parse_enabled(self):
# if the CLI is set, follow that
Expand All @@ -494,6 +510,8 @@ def read_manifest_for_partial_parse(self) -> Optional[Manifest]:
path = os.path.join(self.root_project.target_path,
PARTIAL_PARSE_FILE_NAME)

reparse_reason = None

if os.path.exists(path):
try:
with open(path, 'rb') as fp:
Expand All @@ -502,16 +520,22 @@ def read_manifest_for_partial_parse(self) -> Optional[Manifest]:
# keep this check inside the try/except in case something about
# the file has changed in weird ways, perhaps due to being a
# different version of dbt
if self.matching_parse_results(manifest):
is_partial_parseable, reparse_reason = self.is_partial_parsable(manifest)
if is_partial_parseable:
return manifest
except Exception as exc:
logger.debug(
'Failed to load parsed file from disk at {}: {}'
.format(path, exc),
exc_info=True
)
reparse_reason = ReparseReason.load_file_failure
else:
logger.info(f"Unable to do partial parsing because {path} not found")
reparse_reason = ReparseReason.file_not_found

# this event is only fired if a full reparse is needed
dbt.tracking.track_partial_parser({'full_reparse_reason': reparse_reason})

return None

Expand Down
17 changes: 16 additions & 1 deletion core/dbt/tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
LOAD_ALL_TIMING_SPEC = 'iglu:com.dbt/load_all_timing/jsonschema/1-0-3'
RESOURCE_COUNTS = 'iglu:com.dbt/resource_counts/jsonschema/1-0-0'
EXPERIMENTAL_PARSER = 'iglu:com.dbt/experimental_parser/jsonschema/1-0-0'
PARTIAL_PARSER = 'iglu:com.dbt/partial_parser/jsonschema/1-0-0'
DBT_INVOCATION_ENV = 'DBT_INVOCATION_ENV'


Expand Down Expand Up @@ -426,7 +427,7 @@ def track_invalid_invocation(
def track_experimental_parser_sample(options):
context = [SelfDescribingJson(EXPERIMENTAL_PARSER, options)]
assert active_user is not None, \
'Cannot track project loading time when active user is None'
'Cannot track experimental parser info when active user is None'

track(
active_user,
Expand All @@ -437,6 +438,20 @@ def track_experimental_parser_sample(options):
)


def track_partial_parser(options):
context = [SelfDescribingJson(PARTIAL_PARSER, options)]
assert active_user is not None, \
'Cannot track partial parser info when active user is None'

track(
active_user,
category='dbt',
action='partial_parser',
label=active_user.invocation_id,
context=context
)


def flush():
logger.debug("Flushing usage events")
tracker.flush()
Expand Down
12 changes: 7 additions & 5 deletions test/unit/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
import dbt.parser.manifest
from dbt import tracking
from dbt.contracts.files import SourceFile, FileHash, FilePath
from dbt.contracts.graph.manifest import Manifest, MacroManifest, ManifestStateCheck
from dbt.parser.base import BaseParser
from dbt.contracts.graph.manifest import MacroManifest, ManifestStateCheck
from dbt.graph import NodeSelector, parse_difference

try:
Expand Down Expand Up @@ -340,8 +339,11 @@ def test__partial_parse(self):
loader = dbt.parser.manifest.ManifestLoader(config, {config.project_name: config})
loader.manifest = manifest.deepcopy()

self.assertTrue(loader.matching_parse_results(manifest))
is_partial_parsable, _ = loader.is_partial_parsable(manifest)
self.assertTrue(is_partial_parsable)
manifest.metadata.dbt_version = '0.0.1a1'
self.assertFalse(loader.matching_parse_results(manifest))
is_partial_parsable, _ = loader.is_partial_parsable(manifest)
self.assertFalse(is_partial_parsable)
manifest.metadata.dbt_version = '99999.99.99'
self.assertFalse(loader.matching_parse_results(manifest))
is_partial_parsable, _ = loader.is_partial_parsable(manifest)
self.assertFalse(is_partial_parsable)

0 comments on commit 2679792

Please sign in to comment.