Skip to content

Commit

Permalink
Optimize CI builds for unimportant pyproject.toml changes (apache#37305)
Browse files Browse the repository at this point in the history
When dependencies change in pyproject.toml, we should run build
with `upgrade-to-newer-dependencies`, however we should not run
it when dependencies in pyproject.toml do not change.

That saves about 30 minutes of elapsed time of the build and heavily
limits the number of tests executed. It takes about 30 minutes now
to build the image that has "upgrade-to-newer-dependencies", we only
usually run default Python image in such case and we do not run many
tests that are not needed (for example K8S tests).

This PR optimizes out the case where non-dependency changes only are
done in pyproject.toml. This happens for example when you only change
docstrings and remove ruff rules in [[tools.ruff]] section.

We compare the dependencies and optional dependencies coming from
the change and only when there is a change in those, we set the
`upgrade-to-newer-dependencies` flag. We also print what changed.

Similarly full-tests-needed is only set when build-system changes
or when dependencies change, because then we want to make sure new
dependencies are working on all Python versions.
  • Loading branch information
potiuk authored and abhishekbhakat committed Mar 5, 2024
1 parent 19d43b9 commit f1ddb5a
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 15 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,8 @@ jobs:
run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*"
- uses: actions/checkout@v4
with:
# Need to fetch all history for selective checks tests
fetch-depth: 0
persist-credentials: false
- uses: actions/setup-python@v5
with:
Expand Down
2 changes: 1 addition & 1 deletion dev/breeze/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT.

---------------------------------------------------------------------------------------------------------

Package config hash: 0a4cc814e27e822622708d862952a5b411a1a4ad8f3bca8fa591f39ed670ab6636de3caf7c1d072896c411c7eef824f887202b8de8729b8622f2af9b84a154b3
Package config hash: e2db123fd25e40b515520fb9f6ed32a601fe85e6a22b9845343bc5c81e5785b472527ed3bf6d07521c512d2222f99877a1ce1911ac504a3a6af7b7866aa674cc

---------------------------------------------------------------------------------------------------------
1 change: 1 addition & 0 deletions dev/breeze/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ dependencies = [
"rich>=13.6.0",
"semver>=3.0.2",
"tabulate>=0.9.0",
"tomli>=2.0.1; python_version < '3.11'",
"twine>=4.0.2",
]

Expand Down
132 changes: 123 additions & 9 deletions dev/breeze/src/airflow_breeze/utils/selective_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.
from __future__ import annotations

import difflib
import json
import os
import re
Expand Down Expand Up @@ -59,6 +60,7 @@
TESTS_PROVIDERS_ROOT,
)
from airflow_breeze.utils.provider_dependencies import DEPENDENCIES, get_related_providers
from airflow_breeze.utils.run_utils import run_command

FULL_TESTS_NEEDED_LABEL = "full tests needed"
DEBUG_CI_RESOURCES_LABEL = "debug ci resources"
Expand All @@ -82,7 +84,7 @@ class FileGroupForCi(Enum):
API_TEST_FILES = "api_test_files"
API_CODEGEN_FILES = "api_codegen_files"
HELM_FILES = "helm_files"
SETUP_FILES = "setup_files"
DEPENDENCY_FILES = "dependency_files"
DOC_FILES = "doc_files"
WWW_FILES = "www_files"
SYSTEM_TEST_FILES = "system_tests"
Expand Down Expand Up @@ -112,7 +114,6 @@ def __hash__(self):
r"^dev/.*\.py$",
r"^Dockerfile",
r"^scripts",
r"^pyproject.toml",
r"^generated/provider_dependencies.json$",
],
FileGroupForCi.PYTHON_PRODUCTION_FILES: [
Expand All @@ -137,8 +138,7 @@ def __hash__(self):
r"^tests/kubernetes",
r"^helm_tests",
],
FileGroupForCi.SETUP_FILES: [
r"^pyproject.toml",
FileGroupForCi.DEPENDENCY_FILES: [
r"^generated/provider_dependencies.json$",
],
FileGroupForCi.DOC_FILES: [
Expand Down Expand Up @@ -370,6 +370,8 @@ def __init__(
self._github_repository = github_repository
self._github_actor = github_actor
self._github_context_dict = github_context_dict or {}
self._new_toml: dict[str, Any] = {}
self._old_toml: dict[str, Any] = {}

def __important_attributes(self) -> tuple[Any, ...]:
return tuple(getattr(self, f) for f in self.__HASHABLE_FIELDS)
Expand Down Expand Up @@ -422,6 +424,13 @@ def full_tests_needed(self) -> bool:
):
get_console().print("[warning]Running everything because env files changed[/]")
return True
if self.pyproject_toml_changed:
if self.build_system_changed_in_pyproject_toml:
get_console().print("[warning]Running everything: build-system changed in pyproject.toml[/]")
return True
if self.dependencies_changed_in_pyproject_toml:
get_console().print("[warning]Running everything: dependencies changed in pyproject.toml[/]")
return True
if FULL_TESTS_NEEDED_LABEL in self._pr_labels:
get_console().print(
"[warning]Full tests needed because "
Expand Down Expand Up @@ -786,18 +795,123 @@ def parallel_test_types_list_as_string(self) -> str | None:
def basic_checks_only(self) -> bool:
return not self.ci_image_build

@staticmethod
def _print_diff(old_lines: list[str], new_lines: list[str]):
diff = "\n".join([line for line in difflib.ndiff(old_lines, new_lines) if line and line[0] in "+-?"])
get_console().print(diff)

@cached_property
def pyproject_toml_changed(self) -> bool:
if not self._commit_ref:
get_console().print("[warning]Cannot determine pyproject.toml changes as commit is missing[/]")
return False
new_result = run_command(
["git", "show", f"{self._commit_ref}:pyproject.toml"],
capture_output=True,
text=True,
cwd=AIRFLOW_SOURCES_ROOT,
check=False,
)
if new_result.returncode != 0:
get_console().print(
f"[warning]Cannot determine pyproject.toml changes. "
f"Could not get pyproject.toml from {self._commit_ref}[/]"
)
return False
old_result = run_command(
["git", "show", f"{self._commit_ref}^:pyproject.toml"],
capture_output=True,
text=True,
cwd=AIRFLOW_SOURCES_ROOT,
check=False,
)
if old_result.returncode != 0:
get_console().print(
f"[warning]Cannot determine pyproject.toml changes. "
f"Could not get pyproject.toml from {self._commit_ref}^[/]"
)
return False
try:
import tomllib
except ImportError:
import tomli as tomllib

self._new_toml = tomllib.loads(new_result.stdout)
self._old_toml = tomllib.loads(old_result.stdout)
return True

@cached_property
def build_system_changed_in_pyproject_toml(self) -> bool:
if not self.pyproject_toml_changed:
return False
new_build_backend = self._new_toml["build-system"]["build-backend"]
old_build_backend = self._old_toml["build-system"]["build-backend"]
if new_build_backend != old_build_backend:
get_console().print("[warning]Build backend changed in pyproject.toml [/]")
self._print_diff([old_build_backend], [new_build_backend])
return True
new_requires = self._new_toml["build-system"]["requires"]
old_requires = self._old_toml["build-system"]["requires"]
if new_requires != old_requires:
get_console().print("[warning]Build system changed in pyproject.toml [/]")
self._print_diff(old_requires, new_requires)
return True
return False

@cached_property
def dependencies_changed_in_pyproject_toml(self) -> bool:
if not self.pyproject_toml_changed:
return False
new_deps = self._new_toml["project"]["dependencies"]
old_deps = self._old_toml["project"]["dependencies"]
if new_deps != old_deps:
get_console().print("[warning]Project dependencies changed [/]")
self._print_diff(old_deps, new_deps)
return True
new_optional_deps = self._new_toml["project"].get("optional-dependencies", {})
old_optional_deps = self._old_toml["project"].get("optional-dependencies", {})
if new_optional_deps != old_optional_deps:
get_console().print("[warning]Optional dependencies changed [/]")
all_dep_keys = set(new_optional_deps.keys()).union(old_optional_deps.keys())
for dep in all_dep_keys:
if new_optional_deps.get(dep) != old_optional_deps.get(dep):
get_console().print(f"[warning]Optional dependency {dep} changed[/]")
self._print_diff(old_optional_deps.get(dep, []), new_optional_deps.get(dep, []))
return True
return False

@cached_property
def upgrade_to_newer_dependencies(self) -> bool:
return (
if (
len(
self._matching_files(
FileGroupForCi.SETUP_FILES, CI_FILE_GROUP_MATCHES, CI_FILE_GROUP_EXCLUDES
FileGroupForCi.DEPENDENCY_FILES, CI_FILE_GROUP_MATCHES, CI_FILE_GROUP_EXCLUDES
)
)
> 0
or self._github_event in [GithubEvents.PUSH, GithubEvents.SCHEDULE]
or UPGRADE_TO_NEWER_DEPENDENCIES_LABEL in self._pr_labels
)
):
get_console().print("[warning]Upgrade to newer dependencies: Dependency files changed[/]")
return True
if self.dependencies_changed_in_pyproject_toml:
get_console().print(
"[warning]Upgrade to newer dependencies: Dependencies changed in pyproject.toml[/]"
)
return True
if self.build_system_changed_in_pyproject_toml:
get_console().print(
"[warning]Upgrade to newer dependencies: Build system changed in pyproject.toml[/]"
)
return True
if self._github_event in [GithubEvents.PUSH, GithubEvents.SCHEDULE]:
get_console().print("[warning]Upgrade to newer dependencies: Push or Schedule event[/]")
return True
if UPGRADE_TO_NEWER_DEPENDENCIES_LABEL in self._pr_labels:
get_console().print(
f"[warning]Upgrade to newer dependencies: Label '{UPGRADE_TO_NEWER_DEPENDENCIES_LABEL}' "
f"in {self._pr_labels}[/]"
)
return True
return False

@cached_property
def docs_list_as_string(self) -> str | None:
Expand Down
84 changes: 79 additions & 5 deletions dev/breeze/tests/test_selective_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str):
),
(
pytest.param(
("pyproject.toml",),
("generated/provider_dependencies.json",),
{
"affected-providers-list-as-string": ALL_PROVIDERS_AFFECTED,
"all-python-versions": "['3.8', '3.9', '3.10', '3.11']",
Expand Down Expand Up @@ -595,6 +595,53 @@ def test_expected_output_pull_request_main(
assert_outputs_are_printed(expected_outputs, str(stderr))


@pytest.mark.parametrize(
"files, commit_ref, expected_outputs",
[
(
pytest.param(
("pyproject.toml",),
"2bc8e175b3a4cc84fe33e687f1a00d2a49563090",
{
"full-tests-needed": "false",
},
id="No full tests needed when pyproject.toml changes in insignificant way",
)
),
(
pytest.param(
("pyproject.toml",),
"90e2b12d6b99d2f7db43e45f5e8b97d3b8a43b36",
{
"full-tests-needed": "true",
},
id="Full tests needed when only dependencies change in pyproject.toml",
)
),
(
pytest.param(
("pyproject.toml",),
"c381fdaff42bbda480eee70fb15c5b26a2a3a77d",
{
"full-tests-needed": "true",
},
id="Full tests needed when build-system changes in pyproject.toml",
)
),
],
)
def test_full_test_needed_when_pyproject_toml_changes(
files: tuple[str, ...], commit_ref: str, expected_outputs: dict[str, str]
):
stderr = SelectiveChecks(
files=files,
github_event=GithubEvents.PULL_REQUEST,
commit_ref=commit_ref,
default_branch="main",
)
assert_outputs_are_printed(expected_outputs, str(stderr))


@pytest.mark.parametrize(
"files, pr_labels, default_branch, expected_outputs,",
[
Expand Down Expand Up @@ -1149,30 +1196,52 @@ def test_no_commit_provided_trigger_full_build_for_any_event_type(github_event):


@pytest.mark.parametrize(
"files, expected_outputs, pr_labels",
"files, expected_outputs, pr_labels, commit_ref",
[
pytest.param(
("airflow/models/dag.py",),
{
"upgrade-to-newer-dependencies": "false",
},
(),
None,
id="Regular source changed",
),
pytest.param(
("pyproject.toml",),
{
"upgrade-to-newer-dependencies": "false",
},
(),
# In this commit only ruff configuration changed
"2bc8e175b3a4cc84fe33e687f1a00d2a49563090",
id="pyproject.toml changed but no dependency change",
),
pytest.param(
("pyproject.toml",),
{
"upgrade-to-newer-dependencies": "true",
},
(),
"90e2b12d6b99d2f7db43e45f5e8b97d3b8a43b36",
id="pyproject.toml changed with optional dependencies changed",
),
pytest.param(
("pyproject.toml",),
{
"upgrade-to-newer-dependencies": "true",
},
(),
id="pyproject.toml changed",
"74baebe5e774ac575fe3a49291996473b1daa789",
id="pyproject.toml changed with core dependencies changed",
),
pytest.param(
("airflow/providers/microsoft/azure/provider.yaml",),
{
"upgrade-to-newer-dependencies": "false",
},
(),
None,
id="Provider.yaml changed",
),
pytest.param(
Expand All @@ -1181,6 +1250,7 @@ def test_no_commit_provided_trigger_full_build_for_any_event_type(github_event):
"upgrade-to-newer-dependencies": "true",
},
(),
"None",
id="Generated provider_dependencies changed",
),
pytest.param(
Expand All @@ -1189,16 +1259,20 @@ def test_no_commit_provided_trigger_full_build_for_any_event_type(github_event):
"upgrade-to-newer-dependencies": "true",
},
("upgrade to newer dependencies",),
None,
id="Regular source changed",
),
],
)
def test_upgrade_to_newer_dependencies(
files: tuple[str, ...], expected_outputs: dict[str, str], pr_labels: tuple[str, ...]
files: tuple[str, ...],
expected_outputs: dict[str, str],
pr_labels: tuple[str, ...],
commit_ref: str | None,
):
stderr = SelectiveChecks(
files=files,
commit_ref="HEAD",
commit_ref=commit_ref,
github_event=GithubEvents.PULL_REQUEST,
default_branch="main",
pr_labels=pr_labels,
Expand Down

0 comments on commit f1ddb5a

Please sign in to comment.