Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding a new column is not a breaking contract change #7333

Merged
merged 5 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20230412-133438.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Adding a new column is not a breaking contract change
time: 2023-04-12T13:34:38.231881+02:00
custom:
Author: jtcohen6
Issue: "7332"
60 changes: 45 additions & 15 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
)
from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
from dbt.events.functions import warn_or_error
from dbt.exceptions import ParsingError, InvalidAccessTypeError, ModelContractError
from dbt.exceptions import ParsingError, InvalidAccessTypeError, ContractBreakingChangeError
from dbt.events.types import (
SeedIncreased,
SeedExceedsLimitSamePath,
Expand Down Expand Up @@ -539,29 +539,59 @@ def build_contract_checksum(self):
self.contract.checksum = hashlib.new("sha256", data).hexdigest()

def same_contract(self, old) -> bool:
# If the contract wasn't previously enforced:
if old.contract.enforced is False and self.contract.enforced is False:
# Not a change
# No change -- same_contract: True
return True
if old.contract.enforced is False and self.contract.enforced is True:
# A change, but not a breaking change
# Now it's enforced. This is a change, but not a breaking change -- same_contract: False
return False

breaking_change_reasons = []
# Otherwise: The contract was previously enforced, and we need to check for changes.
# Happy path: The contract is still being enforced, and the checksums are identical.
if self.contract.enforced is True and self.contract.checksum == old.contract.checksum:
# No change -- same_contract: True
return True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is wrong. If old.contract.enforced is True and self.contract.enforced is False it's a breaking change even when checksums are still equal, and this will always return True for that case. Also the "build_contract_checksum" only needs to run for self.contract.enforced is False, and the line above runs it for both True and False.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If old.contract.enforced is True and self.contract.enforced is False it's a breaking change even when checksums are still equal, and this will always return True for that case.

Yes! Good point! Let me add a test for this

Also the "build_contract_checksum" only needs to run for self.contract.enforced is False, and the line above runs it for both True and False.

Per thread above, I don't think we need to call build_contract_checksum at any point within same_contract

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per other thread, we want to capture that the contract has been disabled AND the columns are different.

Copy link
Contributor Author

@jtcohen6 jtcohen6 Apr 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If old.contract.enforced is True and self.contract.enforced is False it's a breaking change even when checksums are still equal, and this will always return True for that case.

After looking more closely, the current formulation of the logic does work, because self.contract.checksum is None!

ipdb> self.contract.checksum == old.contract.checksum
False
ipdb> self.contract
Contract(enforced=False, checksum=None)
ipdb> old.contract
Contract(enforced=True, checksum='1698cf5a415f00ae0dee2c6e97bb51c020d46955847b2e9cec53a8e40d1afb13')

I do agree that a more explicit condition for this would be better, rather than implicitly depending on that behavior to remain the case, so I've added it in


# Otherwise: There has been a change.
# We need to determine if it is a **breaking** change.
# These are the categories of breaking changes:
contract_enforced_disabled: bool = False
columns_removed: List[str] = []
column_type_changes: List[Tuple[str, str, str]] = []

if old.contract.enforced is True and self.contract.enforced is False:
# Breaking change: throw an error
# Note: we don't have contract.checksum for current node, so build
self.build_contract_checksum()
breaking_change_reasons.append("contract has been disabled")
# Breaking change: the contract was previously enforced, and it no longer is
contract_enforced_disabled = True

# Next, compare each column from the previous contract (old.columns)
for key, value in sorted(old.columns.items()):
# Has this column been removed?
if key not in self.columns.keys():
columns_removed.append(value.name)
# Has this column's data type changed?
elif value.data_type != self.columns[key].data_type:
column_type_changes.append(
(str(value.name), str(value.data_type), str(self.columns[key].data_type))
)

if self.contract.checksum != old.contract.checksum:
# Breaking change, throw error
breaking_change_reasons.append("column definitions have changed")
# If a column has been added, it will be missing in the old.columns, and present in self.columns
# That's a change (caught by the different checksums), but not a breaking change

if breaking_change_reasons:
raise (ModelContractError(reasons=" and ".join(breaking_change_reasons), node=self))
# Did we find any changes that we consider breaking? If so, that's an error
if contract_enforced_disabled or columns_removed or column_type_changes:
raise (
ContractBreakingChangeError(
contract_enforced_disabled=contract_enforced_disabled,
columns_removed=columns_removed,
column_type_changes=column_type_changes,
node=self,
)
)

# Otherwise, though we didn't find any *breaking* changes, the contract has still changed -- same_contract: False
else:
# no breaking changes
return True
return False


# ====================================
Expand Down
36 changes: 29 additions & 7 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,22 +207,44 @@ def _fix_dupe_msg(self, path_1: str, path_2: str, name: str, type_name: str) ->
)


class ModelContractError(DbtRuntimeError):
class ContractBreakingChangeError(DbtRuntimeError):
CODE = 10016
MESSAGE = "Contract Error"
MESSAGE = "Breaking Change to Contract"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this, but I don't actually know where this MESSAGE appears. Should, this and the type below, be Contract Breaking Change Error for consistency with the exception class name?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it appears in a generic message that packages up compilation errors, but I'm not sure it does anywhere else. Let me look for it...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few exceptions use it to construct the actual "message", but mostly it's not. Another thing we might want to clean up at some point. So I don't think it matters here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Other message use a "type" property to construct a "{type} Error", but mostly DbtInternalError


def __init__(self, reasons, node=None):
self.reasons = reasons
def __init__(
self, contract_enforced_disabled, columns_removed, column_type_changes, node=None
):
self.contract_enforced_disabled = contract_enforced_disabled
self.columns_removed = columns_removed
self.column_type_changes = column_type_changes
super().__init__(self.message(), node)

@property
def type(self):
return "Contract"
return "Breaking Change to Contract"

def message(self):
breaking_changes = []
if self.contract_enforced_disabled:
breaking_changes.append("The contract's enforcement has been disabled.")
if self.columns_removed:
columns_removed_str = "\n - ".join(self.columns_removed)
breaking_changes.append(f"Columns were removed: \n - {columns_removed_str}")
if self.column_type_changes:
column_type_changes_str = "\n - ".join(
[f"{c[0]} ({c[1]} -> {c[2]})" for c in self.column_type_changes]
)
breaking_changes.append(
f"Columns with data_type changes: \n - {column_type_changes_str}"
)

reasons = "\n\n".join(breaking_changes)

return (
f"There is a breaking change in the model contract because {self.reasons}; "
"you may need to create a new version. See: https://docs.getdbt.com/docs/collaborate/publish/model-versions"
"While comparing to previous project state, dbt detected a breaking change to an enforced contract."
f"\n\n{reasons}\n\n"
"Consider making an additive (non-breaking) change instead, if possible.\n"
"Otherwise, create a new model version: https://docs.getdbt.com/docs/collaborate/publish/model-versions"
)


Expand Down
23 changes: 23 additions & 0 deletions tests/functional/defer_state/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,29 @@
data_type: text
"""

disabled_contract_schema_yml = """
version: 2
models:
- name: view_model
columns:
- name: id
tests:
- unique:
severity: error
- not_null
- name: name
- name: table_model
columns:
- name: id
data_type: integer
tests:
- unique:
severity: error
- not_null
- name: name
data_type: text
"""

exposures_yml = """
version: 2
exposures:
Expand Down
51 changes: 43 additions & 8 deletions tests/functional/defer_state/test_modified_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from dbt.tests.util import run_dbt, update_config_file, write_file, get_manifest

from dbt.exceptions import CompilationError, ModelContractError
from dbt.exceptions import CompilationError, ContractBreakingChangeError

from tests.functional.defer_state.fixtures import (
seed_csv,
Expand All @@ -20,6 +20,7 @@
infinite_macros_sql,
contract_schema_yml,
modified_contract_schema_yml,
disabled_contract_schema_yml,
)


Expand Down Expand Up @@ -302,12 +303,17 @@ def test_changed_contract(self, project):
second_contract_checksum = model.contract.checksum
# double check different contract_checksums
assert first_contract_checksum != second_contract_checksum
with pytest.raises(ModelContractError):
with pytest.raises(ContractBreakingChangeError):
results = run_dbt(["run", "--models", "state:modified.contract", "--state", "./state"])

# Go back to schema file without contract. Should raise an error.
write_file(schema_yml, "models", "schema.yml")
with pytest.raises(ModelContractError):
with pytest.raises(ContractBreakingChangeError):
results = run_dbt(["run", "--models", "state:modified.contract", "--state", "./state"])

# Now disable the contract. Should raise an error.
write_file(disabled_contract_schema_yml, "models", "schema.yml")
with pytest.raises(ContractBreakingChangeError):
results = run_dbt(["run", "--models", "state:modified.contract", "--state", "./state"])


Expand All @@ -320,6 +326,11 @@ def test_changed_contract(self, project):
select 1 as id
"""

modified_my_model_non_breaking_sql = """
-- a comment
select 1 as id, 'blue' as color
"""

my_model_yml = """
models:
- name: my_model
Expand All @@ -339,7 +350,20 @@ def test_changed_contract(self, project):
enforced: true
columns:
- name: id
data_type: string
data_type: text
"""

modified_my_model_non_breaking_yml = """
models:
- name: my_model
config:
contract:
enforced: true
columns:
- name: id
data_type: int
- name: color
data_type: text
"""


Expand All @@ -361,10 +385,21 @@ def test_modified_body_and_contract(self, project):
assert len(results) == 1
self.copy_state()

# Change both body and contract
# Change both body and contract in a *breaking* way (= changing data_type of existing column)
write_file(modified_my_model_yml, "models", "my_model.yml")
write_file(modified_my_model_sql, "models", "my_model.sql")

# should raise even without specifying state:modified.contract
with pytest.raises(ModelContractError):
results = run_dbt(["run", "--models", "state:modified", "--state", "./state"])
# Should raise even without specifying state:modified.contract
with pytest.raises(ContractBreakingChangeError):
results = run_dbt(["run", "-s", "state:modified", "--state", "./state"])

# Change both body and contract in a *non-breaking* way (= adding a new column)
write_file(modified_my_model_non_breaking_yml, "models", "my_model.yml")
write_file(modified_my_model_non_breaking_sql, "models", "my_model.sql")

# Should pass
run_dbt(["run", "-s", "state:modified", "--state", "./state"])

# The model's contract has changed, even if non-breaking, so it should be selected by 'state:modified.contract'
results = run_dbt(["list", "-s", "state:modified.contract", "--state", "./state"])
assert results == ["test.my_model"]