Skip to content

Commit

Permalink
Limit PR checks to build only the modified images (opendatahub-io#558)
Browse files Browse the repository at this point in the history
(cherry picked from commit 7bfbf32)
  • Loading branch information
jiridanek authored and jstourac committed Jul 19, 2024
1 parent c2382f7 commit 3f059f0
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 5 deletions.
20 changes: 16 additions & 4 deletions .github/workflows/build-notebooks-pr.yaml
Original file line number Diff line number Diff line change
@@ -1,28 +1,40 @@
---
"name": "Build Notebooks"
"permissions":
"packages": "read"
"on":
"pull_request":

permissions:
contents: read
packages: read
pull-requests: read

jobs:
gen:
name: Generate job matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.gen.outputs.matrix }}
has_jobs: ${{ steps.gen.outputs.has_jobs }}
steps:
- uses: actions/checkout@v4
- run: python3 ci/cached-builds/gen_gha_matrix_jobs.py

- run: |
python3 ci/cached-builds/gen_gha_matrix_jobs.py \
--owner=${{ github.repository_owner }} \
--repo=${{ github.event.pull_request.base.repo.name }} \
--pr-number=${{ github.event.pull_request.number }} \
--skip-unchanged
id: gen
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# base images
build:
needs: ["gen"]
strategy:
fail-fast: false
matrix: "${{ fromJson(needs.gen.outputs.matrix) }}"
uses: ./.github/workflows/build-notebooks-TEMPLATE.yaml
if: ${{ fromJson(needs.gen.outputs.has_jobs) }}
with:
target: "${{ matrix.target }}"
github: "${{ toJSON(github) }}"
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ endef
# ARG 2: Path of image context we want to build.
# ARG 3: Base image tag name (optional).
define image
$(info #*# Image build directory: <$(2)> #(MACHINE-PARSED LINE)#*#...)
$(call build_image,$(1),$(2),$(3))
$(call push_image,$(1))
endef
Expand Down
43 changes: 42 additions & 1 deletion ci/cached-builds/gen_gha_matrix_jobs.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
import argparse
import itertools
import json
import logging
import os
import pathlib
import re
import string
import sys
import unittest
from typing import Iterable

import gha_pr_changed_files

"""Trivial Makefile parser that extracts target dependencies so that we can build each Dockerfile image target in its
own GitHub Actions job and handle dependencies between them.
Expand Down Expand Up @@ -115,11 +121,13 @@ def write_github_workflow_file(tree: dict[str, list[str]], path: pathlib.Path) -
def flatten(list_of_lists):
return list(itertools.chain.from_iterable(list_of_lists))


def compute_leafs_in_dependency_tree(tree: dict[str, list[str]]) -> list[str]:
key_set = set(tree.keys())
value_set = set(flatten(tree.values()))
return [key for key in key_set if key not in value_set]


def print_github_actions_pr_matrix(tree: dict[str, list[str]], leafs: list[str]) -> list[str]:
"""Outputs GitHub matrix definition Json
"""
Expand All @@ -136,10 +144,24 @@ def print_github_actions_pr_matrix(tree: dict[str, list[str]], leafs: list[str])
targets.append(leaf)

matrix = {"target": targets}
return [f"matrix={json.dumps(matrix, separators=(',', ':'))}"]
return [f"matrix={json.dumps(matrix, separators=(',', ':'))}",
f"has_jobs={json.dumps(len(leafs) > 0, separators=(',', ':'))}"]


def main() -> None:
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)

argparser = argparse.ArgumentParser()
argparser.add_argument("--owner", type=str, required=False,
help="GitHub repo owner/org (for the --skip-unchanged feature)")
argparser.add_argument("--repo", type=str, required=False,
help="GitHub repo name (for the --skip-unchanged feature)")
argparser.add_argument("--pr-number", type=int, required=False,
help="PR number under owner/repo (for the --skip-unchanged feature)")
argparser.add_argument("--skip-unchanged", type=bool, required=False, default=False,
action=argparse.BooleanOptionalAction)
args = argparser.parse_args()

# https://www.gnu.org/software/make/manual/make.html#Reading-Makefiles
with open("Makefile", "rt") as makefile:
lines = read_makefile_lines(makefile)
Expand All @@ -148,6 +170,10 @@ def main() -> None:
write_github_workflow_file(tree, project_dir / ".github" / "workflows" / "build-notebooks.yaml")

leafs = compute_leafs_in_dependency_tree(tree)
if args.skip_unchanged:
logging.info(f"Skipping targets not modified in PR #{args.pr_number}")
changed_files = gha_pr_changed_files.list_changed_files(args.owner, args.repo, args.pr_number)
leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files)
output = print_github_actions_pr_matrix(tree, leafs)

print("leafs", leafs)
Expand All @@ -159,3 +185,18 @@ def main() -> None:

if __name__ == '__main__':
main()


class SelfTests(unittest.TestCase):
def test_select_changed_targets(self):
with open(project_dir / "Makefile", "rt") as makefile:
lines = read_makefile_lines(makefile)
tree = extract_target_dependencies(lines)
leafs = compute_leafs_in_dependency_tree(tree)

changed_files = ["jupyter/datascience/ubi9-python-3.9/Dockerfile"]

leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files)
assert set(leafs) == {'cuda-jupyter-tensorflow-ubi9-python-3.9',
'jupyter-trustyai-ubi9-python-3.9',
'jupyter-pytorch-ubi9-python-3.9'}
126 changes: 126 additions & 0 deletions ci/cached-builds/gha_pr_changed_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import json
import logging
import os
import pathlib
import re
import subprocess
import unittest
import urllib.request

PROJECT_ROOT = pathlib.Path(__file__).parent.parent.parent.resolve()


def get_github_token() -> str:
github_token = os.environ['GITHUB_TOKEN']
return github_token


# https://docs.github.com/en/graphql/guides/forming-calls-with-graphql
def compose_gh_api_request(pull_number: int, owner="opendatahub-io", repo="notebooks", per_page=100,
cursor="") -> urllib.request.Request:
github_token = get_github_token()

return urllib.request.Request(
url="https://api.github.com/graphql",
method="POST",
headers={
"Authorization": f"bearer {github_token}",
},
# https://docs.github.com/en/graphql/guides/using-the-explorer
data=json.dumps({"query": f"""
{{
repository(owner:"{owner}", name:"{repo}") {{
pullRequest(number:{pull_number}) {{
files(first:{per_page}, after:"{cursor}") {{
edges {{
node {{
path
}}
cursor
}}
}}
}}
}}
}}
"""}).encode("utf-8"),
)


def list_changed_files(owner: str, repo: str, pr_number: int, per_page=100) -> list[str]:
files = []

logging.debug("Getting list of changed files from GitHub API")

CURSOR = ""
while CURSOR is not None:
request = compose_gh_api_request(pull_number=pr_number, owner=owner, repo=repo, per_page=per_page,
cursor=CURSOR)
response = urllib.request.urlopen(request)
data = json.loads(response.read().decode("utf-8"))
response.close()
edges = data["data"]["repository"]["pullRequest"]["files"]["edges"]

CURSOR = None
for edge in edges:
files.append(edge["node"]["path"])
CURSOR = edge["cursor"]

logging.debug(f"Determined {len(files)} changed files: {files[:5]} (..., printing up to 5)")
return files


def analyze_build_directories(make_target) -> list[str]:
directories = []

pattern = re.compile(r"#\*# Image build directory: <(?P<dir>[^>]+)> #\(MACHINE-PARSED LINE\)#\*#\.\.\.")
try:
logging.debug(f"Running make in --just-print mode for target {make_target}")
for line in subprocess.check_output(["make", make_target, "--just-print"], encoding="utf-8",
cwd=PROJECT_ROOT).splitlines():
if m := pattern.match(line):
directories.append(m["dir"])
except subprocess.CalledProcessError as e:
print(e.stderr, e.stdout)
raise

logging.debug(f"Target {make_target} depends on files in directories {directories}")
return directories


def should_build_target(changed_files: list[str], target_directories: list[str]) -> str:
"""Returns truthy if there is at least one changed file necessitating a build.
Falsy (empty) string is returned otherwise."""
for directory in target_directories:
for changed_file in changed_files:
if changed_file.startswith(directory):
return changed_file
return ""


def filter_out_unchanged(targets: list[str], changed_files: list[str]) -> list[str]:
changed = []
for target in targets:
target_directories = analyze_build_directories(target)
if reason := should_build_target(changed_files, target_directories):
logging.info(f"✅ Will build {target} because file {reason} has been changed")
changed.append(target)
else:
logging.info(f"❌ Won't build {target}")
return changed


class SelfTests(unittest.TestCase):
def test_compose_gh_api_request__call_without_asserting(self):
request = compose_gh_api_request(pull_number=556, per_page=100, cursor="")
print(request.data)

def test_list_changed_files__pagination_works(self):
changed_files = list_changed_files(owner="opendatahub-io", repo="notebooks", pr_number=556, per_page=1)
assert set(changed_files) == {'codeserver/ubi9-python-3.9/Dockerfile',
'codeserver/ubi9-python-3.9/run-code-server.sh'}

def test_analyze_build_directories(self):
directories = analyze_build_directories("jupyter-intel-pytorch-ubi9-python-3.9")
assert set(directories) == {"base/ubi9-python-3.9",
"intel/base/gpu/ubi9-python-3.9",
"jupyter/intel/pytorch/ubi9-python-3.9"}

0 comments on commit 3f059f0

Please sign in to comment.