Skip to content

Commit

Permalink
[#3725] Switch to full reparse on partial parsing exceptions. Log and
Browse files Browse the repository at this point in the history
report exception information.
  • Loading branch information
gshank committed Aug 13, 2021
1 parent 664f658 commit af16c74
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 15 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ Contributors:
- [@jmriego](https://github.com/jmriego) ([#3526](https://github.com/dbt-labs/dbt/pull/3526))
- [@danielefrigo](https://github.com/danielefrigo) ([#3547](https://github.com/dbt-labs/dbt/pull/3547))


## dbt 0.20.2 (Release TBD)

### Under the hood
- Switch to full reparse on partial parsing exceptions. Log and report exception information. ([#3725](https://github.com/dbt-labs/dbt/issues/3725), [#3733](https://github.com/dbt-labs/dbt/pull/3733))


## dbt 0.20.1 (August 11, 2021)

## dbt 0.20.1rc1 (August 02, 2021)
Expand Down
52 changes: 43 additions & 9 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass
from dataclasses import field
import os
import traceback
from typing import (
Dict, Optional, Mapping, Callable, Any, List, Type, Union, Tuple
)
Expand Down Expand Up @@ -74,6 +75,7 @@ class ReparseReason(StrEnum):
deps_changed = '05_deps_changed'
project_config_changed = '06_project_config_changed'
load_file_failure = '07_load_file_failure'
exception = '08_exception'


# Part of saved performance info
Expand Down Expand Up @@ -199,10 +201,6 @@ def load(self):
# Read files creates a dictionary of projects to a dictionary
# of parsers to lists of file strings. The file strings are
# used to get the SourceFiles from the manifest files.
# In the future the loaded files will be used to control
# partial parsing, but right now we're just moving the
# file loading out of the individual parsers and doing it
# all at once.
start_read_files = time.perf_counter()
project_parser_files = {}
for project in self.all_projects.values():
Expand All @@ -214,15 +212,51 @@ def load(self):
if self.saved_manifest is not None:
partial_parsing = PartialParsing(self.saved_manifest, self.manifest.files)
skip_parsing = partial_parsing.skip_parsing()
if not skip_parsing:
if skip_parsing:
# nothing changed, so we don't need to generate project_parser_files
self.manifest = self.saved_manifest
else:
# create child_map and parent_map
self.saved_manifest.build_parent_and_child_maps()
# files are different, we need to create a new set of
# project_parser_files.
project_parser_files = partial_parsing.get_parsing_files()
self.partially_parsing = True

self.manifest = self.saved_manifest
try:
project_parser_files = partial_parsing.get_parsing_files()
self.partially_parsing = True
self.manifest = self.saved_manifest
except Exception:
# pp_files should still be the full set and manifest is new manifest,
# since get_parsing_files failed
logger.info("Partial parsing enabled but an error occurred. "
"Switching to a full re-parse.")

# Get traceback info
tb_info = traceback.format_exc(10)
formatted_lines = tb_info.splitlines()
(_, line, method) = formatted_lines[-3].split(', ')
exc_info = {
"traceback": tb_info,
"exception": formatted_lines[-1],
"code": formatted_lines[-2],
"location": f"{line} {method}",
}

# get file info for local logs
parse_file_type = None
file_id = partial_parsing.processing_file
if file_id and file_id in self.manifest.files:
old_file = self.manifest.files[file_id]
parse_file_type = old_file.parse_file_type
logger.debug(f"Partial parsing exception processing file {file_id}")
file_dict = old_file.to_dict()
logger.debug(f"PP file: {file_dict}")
exc_info['parse_file_type'] = parse_file_type
logger.debug(f"PP exception info: {exc_info}")

# Send event
if dbt.tracking.active_user is not None:
exc_info['full_reparse_reason'] = ReparseReason.exception
dbt.tracking.track_partial_parser(exc_info)

if self.manifest._parsing_info is None:
self.manifest._parsing_info = ParsingInfo()
Expand Down
12 changes: 7 additions & 5 deletions core/dbt/parser/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dbt.context.context_config import ContextConfig
from dbt.contracts.graph.parsed import ParsedModelNode
import dbt.flags as flags
import dbt.tracking
from dbt.node_types import NodeType
from dbt.parser.base import SimpleSQLParser
from dbt.parser.search import FileBlock
Expand Down Expand Up @@ -108,11 +109,12 @@ def render_update(
# no false positives or misses, we can expect the number model
# files parseable by the experimental parser to match our internal
# testing.
tracking.track_experimental_parser_sample({
"project_id": self.root_project.hashed_name(),
"file_id": utils.get_hash(node),
"status": result
})
if dbt.tracking.active_user is not None: # None in some tests
tracking.track_experimental_parser_sample({
"project_id": self.root_project.hashed_name(),
"file_id": utils.get_hash(node),
"status": result
})

# if the --use-experimental-parser flag was set, and the experimental parser succeeded
elif not isinstance(experimentally_parsed, Exception):
Expand Down
6 changes: 6 additions & 0 deletions core/dbt/parser/partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self, saved_manifest: Manifest, new_files: MutableMapping[str, AnyS
self.deleted_manifest = Manifest()
self.macro_child_map: Dict[str, List[str]] = {}
self.build_file_diff()
self.processing_file = None

def skip_parsing(self):
return (
Expand Down Expand Up @@ -118,16 +119,21 @@ def get_parsing_files(self):
# Need to add new files first, because changes in schema files
# might refer to them
for file_id in self.file_diff['added']:
self.processing_file = file_id
self.add_to_saved(file_id)
# Need to process schema files next, because the dictionaries
# need to be in place for handling SQL file changes
for file_id in self.file_diff['changed_schema_files']:
self.processing_file = file_id
self.change_schema_file(file_id)
for file_id in self.file_diff['deleted_schema_files']:
self.processing_file = file_id
self.delete_schema_file(file_id)
for file_id in self.file_diff['deleted']:
self.processing_file = file_id
self.delete_from_saved(file_id)
for file_id in self.file_diff['changed']:
self.processing_file = file_id
self.update_in_saved(file_id)
return self.project_parser_files

Expand Down
2 changes: 1 addition & 1 deletion core/dbt/tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
LOAD_ALL_TIMING_SPEC = 'iglu:com.dbt/load_all_timing/jsonschema/1-0-3'
RESOURCE_COUNTS = 'iglu:com.dbt/resource_counts/jsonschema/1-0-0'
EXPERIMENTAL_PARSER = 'iglu:com.dbt/experimental_parser/jsonschema/1-0-0'
PARTIAL_PARSER = 'iglu:com.dbt/partial_parser/jsonschema/1-0-0'
PARTIAL_PARSER = 'iglu:com.dbt/partial_parser/jsonschema/1-0-1'
DBT_INVOCATION_ENV = 'DBT_INVOCATION_ENV'


Expand Down

0 comments on commit af16c74

Please sign in to comment.