From 6a6df84b2a3d3369c829fc4d08fe36e8e43fc4f1 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Mon, 2 Mar 2020 00:27:27 +0100 Subject: [PATCH] [AIRFLOW-5828] Move build logic out from hooks/build This is the final step of simplifying the Breeze scripts by moving all the logic out from Travis' hooks/build --- BREEZE.rst | 4 +- breeze | 61 ++- confirm | 6 +- hooks/build | 396 +----------------- scripts/ci/_utils.sh | 385 ++++++++++++++--- scripts/ci/ci_build_dockerhub.sh | 64 +++ scripts/ci/ci_check_license.sh | 2 + scripts/ci/ci_docs.sh | 2 + scripts/ci/ci_flake8.sh | 1 + scripts/ci/ci_mypy.sh | 6 +- scripts/ci/ci_prepare_backport_packages.sh | 21 +- scripts/ci/ci_pylint_main.sh | 4 +- scripts/ci/ci_pylint_tests.sh | 1 + scripts/ci/ci_refresh_pylint_todo.sh | 2 + scripts/ci/ci_run_airflow_testing.sh | 3 +- scripts/ci/ci_run_all_static_checks.sh | 6 +- .../ci/ci_run_static_checks_pylint_tests.sh | 6 +- .../ci/in_container/_in_container_utils.sh | 81 +--- scripts/ci/in_container/entrypoint_ci.sh | 9 +- scripts/ci/in_container/run_flake8.sh | 10 - scripts/ci/in_container/run_mypy.sh | 6 - scripts/ci/in_container/run_pylint_main.sh | 4 - scripts/ci/in_container/run_pylint_tests.sh | 3 - scripts/ci/pre_commit_ci_build.sh | 4 +- scripts/ci/pre_commit_setup_cfg_file.sh | 1 - 25 files changed, 472 insertions(+), 616 deletions(-) create mode 100755 scripts/ci/ci_build_dockerhub.sh diff --git a/BREEZE.rst b/BREEZE.rst index 667d2647ef7ed..c3d205b226fd0 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -394,7 +394,7 @@ Mounting Local Sources to Breeze Important sources of Airflow are mounted inside the ``airflow-testing`` container that you enter. This means that you can continue editing your changes on the host in your favourite IDE and have them visible in the Docker immediately and ready to test without rebuilding images. You can disable mounting -by specifying ``--skip-mounting-source-volume`` flag when running Breeze. In this case you will have sources +by specifying ``--skip-mounting-local-sources`` flag when running Breeze. In this case you will have sources embedded in the container and changes to these sources will not be persistent. @@ -877,7 +877,7 @@ This is the current syntax for `./breeze <./breeze>`_: Manage mounting local files **************************************************************************************************** - -l, --skip-mounting-source-volume + -l, --skip-mounting-local-sources Skips mounting local volume with sources - you get exactly what is in the docker image rather than your current local sources of airflow. diff --git a/breeze b/breeze index 4c8e504abc72a..e1fcc4cb664d7 100755 --- a/breeze +++ b/breeze @@ -84,7 +84,7 @@ function setup_default_breeze_variables() { . "${MY_DIR}/breeze-complete" # Skips mounting local Airflow sources - SKIP_MOUNTING_LOCAL_SOURCES="false" + MOUNT_LOCAL_SOURCES="true" # Holds last subcommand used LAST_SUBCOMMAND="" @@ -114,9 +114,6 @@ function setup_default_breeze_variables() { # Do not enable Kind Kubernetes cluster by default export ENABLE_KIND_CLUSTER="false" - # We use docker image caches by default to speed up the builds - export USE_PULLED_IMAGES_AS_CACHE=${USE_PULLED_IMAGES_AS_CACHE:="true"} - # By default we do not push images. This can be overridden by -u flag. export PUSH_IMAGES=${PUSH_IMAGES:="false"} @@ -128,15 +125,9 @@ function setup_default_breeze_variables() { export INSTALL_AIRFLOW_VERSION=${INSTALL_AIRFLOW_VERSION:="current"} # Determine version of the Airflow from version.py - AIRFLOW_VERSION=$(cat airflow/version.py - << EOF | python -print(version.replace("+","")) -EOF - ) + AIRFLOW_VERSION=$(grep version "airflow/version.py" | awk '{print $3}' | sed "s/['+]//g") export AIRFLOW_VERSION - # Verbosity in running ci scripts - export VERBOSE=${VERBOSE:="false"} - # Whether to force build without checking if it is needed export FORCE_BUILD_IMAGES=${FORCE_BUILD_IMAGES:="false"} @@ -282,7 +273,7 @@ EOF } function print_badge { - if [[ ! -f "${SUPPRESS_ASCIIART_FILE}" ]]; then + if [[ ! -f "${SUPPRESS_ASCIIART_FILE}" && ${COMMAND_TO_RUN} == "enter_breeze" ]]; then cat < /dev/null; then + if [[ ${COMMAND_TO_RUN} == "enter_breeze" ]] ; then + # shellcheck disable=SC2034 # Unused variables left for comp_breeze usage + if ! typeset -f "_comp_breeze" > /dev/null; then + print_line + echo + echo " You can setup autocomplete by running '${CMDNAME} setup-autocomplete'" + echo + echo + fi + print_line + echo + echo " You can toggle ascii/cheatsheet by running:" + echo " * ${CMDNAME} toggle-suppress-cheatsheet" + echo " * ${CMDNAME} toggle-suppress-asciiart" + echo print_line echo - echo " You can setup autocomplete by running '${CMDNAME} setup-autocomplete'" + echo echo echo fi - print_line - echo - echo " You can toggle ascii/cheatsheet by running:" - echo " * ${CMDNAME} toggle-suppress-cheatsheet" - echo " * ${CMDNAME} toggle-suppress-asciiart" - echo - print_line - echo - echo - echo - echo } function make_sure_precommit_is_installed { @@ -1420,6 +1412,7 @@ function run_static_checks { } function run_build_command { + prepare_build case "${COMMAND_TO_RUN}" in enter_breeze|build_docs|run_tests|run_docker_compose|run_in_bash) rebuild_ci_image_if_needed diff --git a/confirm b/confirm index 645d54aab901f..019219d407c48 100755 --- a/confirm +++ b/confirm @@ -31,13 +31,13 @@ if [[ "${FORCE_ANSWER_TO_QUESTIONS:=""}" != "" ]]; then esac else echo - echo "Please confirm ${1} images. Are you sure? [y/N/q]" + echo "Please confirm ${1}. Are you sure? [y/N/q]" read -r RESPONSE fi case "${RESPONSE}" in [yY][eE][sS]|[yY]) - echo "The answer is 'yes'. Attempting to ${1} images. This can take some time !" + echo "The answer is 'yes'. ${1}. This can take some time !" exit 0 ;; [qQ][uU][iI][tT]|[qQ]) @@ -45,7 +45,7 @@ case "${RESPONSE}" in exit 2 ;; *) - echo "The answer is 'no'. Skipping attempt to ${1} images." + echo "The answer is 'no'. Skipping ${1}." exit 1 ;; esac diff --git a/hooks/build b/hooks/build index 5e572ea0f715e..24b7e3c70df8d 100755 --- a/hooks/build +++ b/hooks/build @@ -20,400 +20,6 @@ # on Travis CI to potentially rebuild (and refresh layers that # are not cached) Docker images that are used to run CI jobs -set -euo pipefail - MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -echo "My dir: ${MY_DIR}" - -AIRFLOW_SOURCES=$(cd "${MY_DIR}/.." || exit 1; pwd) -cd "${AIRFLOW_SOURCES}" - -# shellcheck source=common/_default_branch.sh -source "${AIRFLOW_SOURCES}/common/_default_branch.sh" -# shellcheck source=common/_files_for_rebuild_check.sh -source "${AIRFLOW_SOURCES}/common/_files_for_rebuild_check.sh" - -echo -echo "Airflow root directory: ${AIRFLOW_SOURCES}" -echo - -BUILD_CACHE_DIR="${AIRFLOW_SOURCES}/.build" -mkdir -pv "${BUILD_CACHE_DIR}" - -date - -BUILD_START_TIME=$(date +%s) -LAST_STEP_START_TIME=${BUILD_START_TIME} -LAST_STEP_NAME="" -STEP_STARTED="false" -PYTHON_VERSION_FOR_DEFAULT_IMAGE=3.6 -VERBOSE=${VERBOSE:="false"} -VERBOSE_COMMANDS=${VERBOSE_COMMANDS:="false"} - -if [[ ${VERBOSE_COMMANDS} == "true" ]]; then - set -x -fi - -function end_step { - if [[ "${STEP_STARTED}" != "true" ]]; then - return - fi - LAST_STEP_END_TIME=$(date +%s) - echo - echo "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" - echo " Finishing step: ${LAST_STEP_NAME}" - echo " Date: $(date)" - echo " Step time in s : $((LAST_STEP_END_TIME-LAST_STEP_START_TIME))" - echo " Total time in s: $((LAST_STEP_END_TIME-BUILD_START_TIME))" - echo "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" - echo - STEP_STARTED="false" -} - -function start_step { - end_step - LAST_STEP_NAME="${1}" - LAST_STEP_START_TIME=$(date +%s) - STEP_STARTED="true" - echo - echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - echo " Starting step: ${LAST_STEP_NAME}" - echo " Date: $(date)" - echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - echo -} - -function add_image_to_push { - IMAGE=$1 - IMAGES_BUILT="${IMAGES_BUILT} ${IMAGE}" - echo - echo "Adding TAG ${IMAGE} to push" - echo - echo - echo "List of tags to push now: '${IMAGES_BUILT}'" - echo -} - -function build_python_image { - NAME="${1}" - MY_IMAGE_TAG="${2}" - TARGET_IMAGE="${3}" - AIRFLOW_EXTRAS="${4:-all}" - HOME="${6:-/home/airflow}" - - echo "Build ${NAME} image: ${MY_IMAGE_TAG}" - echo "Python base image: ${PYTHON_BASE_IMAGE}" - - set +u - set -x - docker build \ - --build-arg PYTHON_BASE_IMAGE="${PYTHON_BASE_IMAGE}" \ - --build-arg AIRFLOW_VERSION="${AIRFLOW_VERSION}" \ - --build-arg AIRFLOW_EXTRAS="${AIRFLOW_EXTRAS}" \ - --build-arg AIRFLOW_BRANCH="${BRANCH_NAME}" \ - --build-arg AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD="${AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD}" \ - --build-arg HOME="${HOME}" \ - "${DOCKER_CACHE_DIRECTIVE_CI[@]}" \ - -t "${MY_IMAGE_TAG}" \ - --target "${TARGET_IMAGE}" \ - . - - add_image_to_push "${MY_IMAGE_TAG}" - set +x - set -u -} - -start_step "Setting variables" - -# shellcheck source=common/_autodetect_variables.sh -. "${AIRFLOW_SOURCES}/common/_autodetect_variables.sh" - -echo -echo "Travis event type: ${TRAVIS_EVENT_TYPE:=}" -echo - -# In case of CRON jobs on Travis we run builds without cache -if [[ "${TRAVIS_EVENT_TYPE:=}" == "cron" ]]; then - echo - echo "Disabling cache for CRON jobs" - echo - USE_NO_CACHE=${USE_NO_CACHE:="true"} -fi - -# You can set USE_NO_CACHE to true if you want to use standard Docker cache during build -# This way you can test building everything from the scratch -USE_NO_CACHE=${USE_NO_CACHE:="false"} - -# If cache is not used, there is no point in pulling images for cache -if [[ "${USE_NO_CACHE:=}" == "true" ]]; then - echo - echo "Pulling images is disabled because cache is not used" - echo - USE_PULLED_IMAGES_AS_CACHE="false" -fi - - -# You can set USE_PULLED_IMAGES_AS_CACHE to false if you do not want to use pulled images -# as cache during build -# This way you can test building from the scratch -USE_PULLED_IMAGES_AS_CACHE=${USE_PULLED_IMAGES_AS_CACHE:="true"} - -pwd -# Determine version of the Airflow from version.py -AIRFLOW_VERSION=$(cat airflow/version.py - << EOF | python -print(version.replace("+","")) -EOF -) -export AIRFLOW_VERSION - -# Check if we are running in the CI environment -CI=${CI:="false"} - -if [[ "${CI}" == "true" ]]; then - NON_CI="false" -else - NON_CI="true" -fi - -# Extras used to build cache and CI image -AIRFLOW_CI_EXTRAS=${AIRFLOW_CI_EXTRAS:="devel_ci"} - -# Whether this is a release build -AIRFLOW_RELEASE_BUILD=${AIRFLOW_RELEASE_BUILD:="false"} - -echo -echo "Airflow ${AIRFLOW_VERSION} Python: ${PYTHON_VERSION}." -echo - -# Whether to push images after build -# This is set to false on CI builds -export PUSH_IMAGES=${PUSH_IMAGES:=${NON_CI}} - -# Whether to force pull images to populate cache -export FORCE_PULL_IMAGES=${FORCE_PULL_IMAGES:="false"} - -# In CI environment (and local force) we skip pulling latest python image -export SKIP_PULLING_LATEST_PYTHON_IMAGE=${SKIP_PULLING_LATEST_PYTHON_IMAGE:=${CI}} - -export AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD=${AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD:="true"} -echo "The build optimised for CI: ${AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD}" - -# Base python image for the build -export PYTHON_BASE_IMAGE=python:${PYTHON_VERSION}-slim-stretch - -if [[ "${AIRFLOW_RELEASE_BUILD}" == "true" ]]; then - export AIRFLOW_CI_IMAGE="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${AIRFLOW_VERSION}-python${PYTHON_VERSION}-ci" - export AIRFLOW_CI_IMAGE_DEFAULT="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${AIRFLOW_VERSION}-ci" - export AIRFLOW_CI_IMAGE_LATEST="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:latest-python${PYTHON_VERSION}-${TAG_PREFIX}-ci" - export AIRFLOW_CI_IMAGE_LATEST_DEFAULT="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:latest-${TAG_PREFIX}-ci" -else - export AIRFLOW_CI_IMAGE="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${TAG_PREFIX}-python${PYTHON_VERSION}-ci" - export AIRFLOW_CI_IMAGE_DEFAULT="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${TAG_PREFIX}-ci" -fi -# In the future we can enable buildkit. -# It's experimental now and cache does not work out of the box with buildkit in Docker 18.09.2, buildkit 0.3.3 -# It is fixed in upcoming buildkit 0.4.0. -# Buildkit will make build faster (including parallel builds of multi-stage builds). -# It will also help with simpler skipping of unused images. -export DOCKER_BUILDKIT=${DOCKER_BUILDKIT:=0} - -# List of images to push at the end of the build -IMAGES_BUILT="" - -end_step - -start_step "Populating cache" - -DOCKER_CACHE_DIRECTIVE_CI=() - -if [[ "${USE_PULLED_IMAGES_AS_CACHE}" == "true" ]]; then - echo - echo "Pulling images to populate cache" - echo - echo - if [[ "${FORCE_PULL_IMAGES}" == "true" ]]; then - if [[ ${SKIP_PULLING_LATEST_PYTHON_IMAGE} == "true" ]]; then - echo - echo "Skip force-pulling the base images." - echo - else - set -x - echo - echo "Force pull base python image." - echo - docker pull "${PYTHON_BASE_IMAGE}" - echo - set +x - fi - fi - IMAGES_TO_PULL="${AIRFLOW_CI_IMAGE}" - - DOCKER_CACHE_DIRECTIVE_CI=() - if [[ ${IMAGES_TO_PULL} == "" ]]; then - echo - echo "Skipping building of all images." - echo - else - for IMAGE in ${IMAGES_TO_PULL} - do - echo - echo "Checking whether image ${IMAGE} needs to be pulled." - echo - PULL_IMAGE="false" - if [[ "${FORCE_PULL_IMAGES}" == "true" ]]; then - echo - echo "Pulling images is forced. Pulling ${IMAGE}" - echo - PULL_IMAGE="true" - else - IMAGE_HASH=$(docker images -q "${IMAGE}" 2> /dev/null) - if [[ "${IMAGE_HASH}" == "" ]]; then - echo - echo "No image ${IMAGE} locally available. Pulling for the first time." - echo - PULL_IMAGE="true" - else - echo - echo "Image ${IMAGE} is in local registry (${IMAGE_HASH}). Not pulling it!" - echo - PULL_IMAGE="false" - fi - fi - if [[ "${PULL_IMAGE}" == "true" ]]; then - echo - set -x - docker pull "${IMAGE}" || true - set +x - echo - fi - if [[ "${IMAGE}" == "${AIRFLOW_CI_IMAGE}" ]]; then - DOCKER_CACHE_DIRECTIVE_CI+=("--cache-from" "${IMAGE}") - else - echo - echo "Don't know how to set cache directive for ${IMAGE}. Exiting" - echo - exit 1 - fi - done - fi -fi - -start_step "Setting cache options" - -if [[ "${USE_NO_CACHE}" == "true" ]]; then - DOCKER_CACHE_DIRECTIVE_CI+=("--no-cache") - echo - echo "Skip cache for builds. Everything will be rebuilt from scratch." - echo - echo "Cache directives used: " - echo "CI build: ${DOCKER_CACHE_DIRECTIVE_CI[*]}" - echo -elif [[ "${USE_PULLED_IMAGES_AS_CACHE}" == "true" ]]; then - echo - echo "This build uses Docker cache from pulled images" - echo "Cache directives used: " - set +u - echo "CI build: ${DOCKER_CACHE_DIRECTIVE_CI[*]}" - set -u - echo -else - DOCKER_CACHE_DIRECTIVE_CI=() - echo - echo "Use default cache from locally built images." - echo - echo "Cache directives used: " - set +u - echo "CI build: ${DOCKER_CACHE_DIRECTIVE_CI[*]}" - set -u - echo -fi - -start_step "Creating deployment directory" - -STAT_BIN=stat -if [[ "${OSTYPE}" == "darwin"* ]]; then - STAT_BIN=gstat -fi - -# Build id identifying the build uniquely -BUILD_ID=${BUILD_ID:="local"} - - -# directory where "deployment" artifacts should be placed -DEPLOY_DIR="${AIRFLOW_SOURCES}/dist/${BRANCH_NAME}/$(date "+%Y-%m-%d")/${BUILD_ID}/${PYTHON_VERSION}" - -mkdir -pv "${DEPLOY_DIR}" - -# -# Fixing permissions for all important files that are going to be added to Docker context -# This is necessary, because there are different default umask settings on different *NIX -# In case of some systems (especially in the CI environments) there is default +w group permission -# set automatically via UMASK when git checkout is performed. -# https://unix.stackexchange.com/questions/315121/why-is-the-default-umask-002-or-022-in-many-unix-systems-seems-insecure-by-defa -# Unfortunately default setting in git is to use UMASK by default: -# https://git-scm.com/docs/git-config/1.6.3.1#git-config-coresharedRepository -# This messes around with Docker context invalidation because the same files have different permissions -# and effectively different hash used for context validation calculation. -# -# We fix it by removing write permissions for other/group for all files that are in the Docker context. -# -# Since we can't (easily) tell what dockerignore would restrict, we'll just to -# it to "all" files in the git repo, making sure to exclude the www/static/docs -# symlink which is broken until the docs are built. - -function filterout_deleted_files { - # Take NUL-separated stdin, return only files that exist on stdout NUL-separated - # This is to cope with users deleting files or folders locally but not doing `git rm` - xargs -0 "$STAT_BIN" --printf '%n\0' 2>/dev/null || true; -} - -# This deals with files -git ls-files -z -- ./ | filterout_deleted_files | xargs -0 chmod og-w -# and this deals with directories -git ls-tree -z -r -d --name-only HEAD | filterout_deleted_files | xargs -0 chmod og-w,og+x - -start_step "Build Airflow CI full image" -build_python_image "Airflow CI" \ - "${AIRFLOW_CI_IMAGE}" \ - "main" \ - "devel_ci" \ - "root" \ - "/root" - -if [[ "${PYTHON_VERSION_FOR_DEFAULT_IMAGE}" == "${PYTHON_VERSION}" ]]; then - docker tag "${AIRFLOW_CI_IMAGE}" "${AIRFLOW_CI_IMAGE_DEFAULT}" - add_image_to_push "${AIRFLOW_CI_IMAGE_DEFAULT}" -fi -if [[ "${AIRFLOW_RELEASE_BUILD}" == "true" ]]; then - docker tag "${AIRFLOW_CI_IMAGE}" "${AIRFLOW_CI_IMAGE_LATEST}" - add_image_to_push "${AIRFLOW_CI_IMAGE_LATEST}" - if [[ "${PYTHON_VERSION_FOR_DEFAULT_IMAGE}" == "${PYTHON_VERSION}" ]]; then - docker tag "${AIRFLOW_CI_IMAGE}" "${AIRFLOW_CI_IMAGE_LATEST_DEFAULT}" - add_image_to_push "${AIRFLOW_CI_IMAGE_LATEST_DEFAULT}" - fi -fi - -start_step "Pushing images" - -if [[ "${PUSH_IMAGES}" != "false" ]]; then - echo - echo "Pushing images: ${IMAGES_BUILT}" - echo - for IMAGE in ${IMAGES_BUILT} - do - echo "Pushing image '${IMAGE}'" - docker push ${IMAGE} - done -else - echo - echo "Skip pushing images." - echo "Images built: ${IMAGES_BUILT}" - echo -fi - -end_step - -echo -echo "Build finished" -echo +exec "${MY_DIR}/../scripts/ci/ci_build_dockerhub.sh" diff --git a/scripts/ci/_utils.sh b/scripts/ci/_utils.sh index edca8cb169410..40f93d386ef48 100644 --- a/scripts/ci/_utils.sh +++ b/scripts/ci/_utils.sh @@ -30,6 +30,13 @@ function check_verbose_setup { } +function verbose_docker { + if [[ ${VERBOSE:="false"} == "true" ]]; then + echo "docker" "${@}" + fi + docker "${@}" +} + function initialize_breeze_environment { AIRFLOW_SOURCES=${AIRFLOW_SOURCES:=$(cd "${MY_DIR}/../../" && pwd)} export AIRFLOW_SOURCES @@ -39,9 +46,6 @@ function initialize_breeze_environment { LAST_FORCE_ANSWER_FILE="${BUILD_CACHE_DIR}/last_force_answer.sh" - IMAGES_TO_CHECK=("CI") - export IMAGES_TO_CHECK - # Create directories if needed mkdir -p "${AIRFLOW_SOURCES}/.mypy_cache" mkdir -p "${AIRFLOW_SOURCES}/logs" @@ -70,7 +74,7 @@ function initialize_breeze_environment { export PYTHONDONTWRITEBYTECODE=${PYTHONDONTWRITEBYTECODE:="true"} # By default we assume the kubernetes cluster is not being started - export ENABLE_KIND_CLUSTER=${ENABLE_KIND_CLUSTER:="false"} + export ENABLE_KIND_CLUSTER=${ENABLE_KIND_CLUSTER:="false"} # # Sets mounting of host volumes to container for static checks # unless MOUNT_HOST_AIRFLOW_VOLUME is not true @@ -121,6 +125,21 @@ function initialize_breeze_environment { fi export EXTRA_DOCKER_FLAGS + + # We use pulled docker image cache by default to speed up the builds + export DOCKER_CACHE=${DOCKER_CACHE:="pulled"} + + + STAT_BIN=stat + if [[ "${OSTYPE}" == "darwin"* ]]; then + STAT_BIN=gstat + fi + + AIRFLOW_VERSION=$(grep version "airflow/version.py" | awk '{print $3}' | sed "s/['+]//g") + export AIRFLOW_VERSION + + # default version for dockerhub images + export PYTHON_VERSION_FOR_DEFAULT_DOCKERHUB_IMAGE=3.6 } function print_info() { @@ -312,7 +331,6 @@ function check_if_docker_build_is_needed() { print_info print_info "Checking if docker image build is needed for ${THE_IMAGE_TYPE} image." print_info - local IMAGE_BUILD_NEEDED="false" if [[ ${FORCE_BUILD_IMAGES:=""} == "true" ]]; then echo "Docker image build is forced for ${THE_IMAGE_TYPE} image" set +e @@ -322,7 +340,6 @@ function check_if_docker_build_is_needed() { check_file_md5sum "${AIRFLOW_SOURCES}/${FILE}" done set -e - IMAGES_TO_REBUILD+=("${THE_IMAGE_TYPE}") export NEEDS_DOCKER_BUILD="true" else set +e @@ -330,13 +347,10 @@ function check_if_docker_build_is_needed() { do if ! check_file_md5sum "${AIRFLOW_SOURCES}/${FILE}"; then export NEEDS_DOCKER_BUILD="true" - IMAGE_BUILD_NEEDED=true fi done set -e - if [[ ${IMAGE_BUILD_NEEDED} == "true" ]]; then - IMAGES_TO_REBUILD+=("${THE_IMAGE_TYPE}") - export NEEDS_DOCKER_BUILD="true" + if [[ ${NEEDS_DOCKER_BUILD} == "true" ]]; then echo "Docker image build is needed for ${THE_IMAGE_TYPE} image!" else print_info "Docker image build is not needed for ${THE_IMAGE_TYPE} image!" @@ -448,7 +462,6 @@ function forget_last_answer() { fi } - function confirm_image_rebuild() { if [[ -f "${LAST_FORCE_ANSWER_FILE}" ]]; then # set variable from last answered response given in the same pre-commit run - so that it can be @@ -456,29 +469,56 @@ function confirm_image_rebuild() { # shellcheck disable=SC1090 source "${LAST_FORCE_ANSWER_FILE}" fi - set +e + set e + local RES if [[ ${CI:="false"} == "true" ]]; then print_info - print_info "CI environment - forcing build for ${THE_IMAGE_TYPE} image." + print_info "CI environment - forcing rebuild for image ${THE_IMAGE_TYPE}." print_info RES="0" + elif [[ -n "${FORCE_ANSWER_TO_QUESTIONS:=""}" ]]; then + print_info + print_info "Forcing answer '${FORCE_ANSWER_TO_QUESTIONS}'" + print_info + case "${FORCE_ANSWER_TO_QUESTIONS}" in + [yY][eE][sS]|[yY]) + RES="0" ;; + [qQ][uU][iI][tT]|[qQ]) + RES="2" ;; + *) + RES="1" ;; + esac + elif [[ -t 0 ]]; then + # Check if this script is run interactively with stdin open and terminal attached + "${AIRFLOW_SOURCES}/confirm" "Rebuild image ${THE_IMAGE_TYPE} (might take some time)" + RES=$? + elif [[ ${DETECTED_TERMINAL:=$(tty)} != "not a tty" ]]; then + # Make sure to use output of tty rather than stdin/stdout when available - this way confirm + # will works also in case of pre-commits (git does not pass stdin/stdout to pre-commit hooks) + # shellcheck disable=SC2094 + "${AIRFLOW_SOURCES}/confirm" "Rebuild image ${THE_IMAGE_TYPE} (might take some time)" \ + <"${DETECTED_TERMINAL}" >"${DETECTED_TERMINAL}" + RES=$? + export DETECTED_TERMINAL elif [[ -c /dev/tty ]]; then + export DETECTED_TERMINAL=/dev/tty # Make sure to use /dev/tty first rather than stdin/stdout when available - this way confirm # will works also in case of pre-commits (git does not pass stdin/stdout to pre-commit hooks) - "${AIRFLOW_SOURCES}/confirm" "build ${THE_IMAGE_TYPE}" /dev/tty - RES=$? - elif [[ -t 0 ]]; then - # Check if this script is run interactively with stdin open and terminal attached - "${AIRFLOW_SOURCES}/confirm" "build ${THE_IMAGE_TYPE}" + # shellcheck disable=SC2094 + "${AIRFLOW_SOURCES}/confirm" "Rebuild image ${THE_IMAGE_TYPE} (might take some time)" \ + <"${DETECTED_TERMINAL}" >"${DETECTED_TERMINAL}" RES=$? else - # No terminal, no stdin - quitting! + print_info + print_info "No terminal, no stdin - quitting" + print_info + # No terminal, no stdin, no force answer - quitting! RES="2" fi set -e if [[ ${RES} == "1" ]]; then print_info - print_info "Skipping build for ${THE_IMAGE_TYPE}" + print_info "Skipping build for image ${THE_IMAGE_TYPE}" print_info SKIP_REBUILD="true" # Force "no" also to subsequent questions so that if you answer it once, you are not asked @@ -487,15 +527,12 @@ function confirm_image_rebuild() { echo 'export FORCE_ANSWER_TO_QUESTIONS="no"' > "${LAST_FORCE_ANSWER_FILE}" elif [[ ${RES} == "2" ]]; then echo >&2 - echo >&2 "ERROR: The image needs to be built for ${THE_IMAGE_TYPE} - it is outdated. " - echo >&2 " Make sure you build the images by running run one of:" - echo >&2 " * ./breeze build-only" - echo >&2 " * ./breeze build-only --force-pull-images" + echo >&2 "ERROR: The ${THE_IMAGE_TYPE} needs to be rebuilt - it is outdated. " + echo >&2 " Make sure you build the images bu running run one of:" + echo >&2 " * PYTHON_VERSION=${PYTHON_VERSION} ./scripts/ci/local_ci_build*.sh" + echo >&2 " * PYTHON_VERSION=${PYTHON_VERSION} ./scripts/ci/local_ci_pull_and_build*.sh" echo >&2 - echo >&2 " The first command works incrementally from your last local build." - echo >&2 " The second command you use if you want to completely refresh your images from dockerhub." - echo >&2 - echo >&2 " If you run it via pre-commit separately, run 'pre-commit run build' first." + echo >&2 " If you run it via pre-commit as individual hook, you can run 'pre-commit run build'." echo >&2 exit 1 else @@ -504,14 +541,24 @@ function confirm_image_rebuild() { fi } -function rebuild_image_if_needed() { - AIRFLOW_VERSION=$(cat airflow/version.py - << EOF | python -print(version.replace("+","")) -EOF - ) - export AIRFLOW_VERSION - export BUILT_IMAGE_FLAG_FILE="${BUILD_CACHE_DIR}/${BRANCH_NAME}/.built_${PYTHON_VERSION}" +function set_current_image_variables { + if [[ ${THE_IMAGE_TYPE:=} == "CI" ]]; then + export AIRFLOW_IMAGE="${AIRFLOW_CI_IMAGE}" + export AIRFLOW_IMAGE_DEFAULT="${AIRFLOW_CI_IMAGE_DEFAULT}" + else + export AIRFLOW_IMAGE="" + export AIRFLOW_IMAGE_DEFAULT="" + fi + + if [[ "${PYTHON_VERSION_FOR_DEFAULT_DOCKERHUB_IMAGE}" == "${PYTHON_VERSION}" ]]; then + export DEFAULT_IMAGE="${AIRFLOW_IMAGE_DEFAULT}" + else + export DEFAULT_IMAGE="" + fi +} +function rebuild_image_if_needed() { + set_current_image_variables if [[ -f "${BUILT_IMAGE_FLAG_FILE}" ]]; then print_info print_info "${THE_IMAGE_TYPE} image already built locally." @@ -525,7 +572,6 @@ EOF fi NEEDS_DOCKER_BUILD="false" - IMAGES_TO_REBUILD=() check_if_docker_build_is_needed if [[ "${NEEDS_DOCKER_BUILD}" == "true" ]]; then SKIP_REBUILD="false" @@ -543,8 +589,7 @@ EOF print_info print_info "Build start: ${THE_IMAGE_TYPE} image." print_info - # shellcheck source=hooks/build - ./hooks/build | tee -a "${OUTPUT_LOG}" + build_image update_all_md5_files print_info print_info "Build completed: ${THE_IMAGE_TYPE} image." @@ -552,21 +597,11 @@ EOF fi else print_info - print_info "No need to rebuild - none of the important files changed: ${FILES_FOR_REBUILD_CHECK[*]}" + print_info "No need to build - none of the important files changed: ${FILES_FOR_REBUILD_CHECK[*]}" print_info fi } -# -# Rebuilds the image for tests if needed. -# -function rebuild_ci_image_if_needed() { - export THE_IMAGE_TYPE="CI" - export AIRFLOW_CI_IMAGE="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${DEFAULT_BRANCH}-python${PYTHON_VERSION}-ci" - - rebuild_image_if_needed -} - # # Starts the script/ If VERBOSE_COMMANDS variable is set to true, it enables verbose output of commands executed @@ -612,11 +647,13 @@ function script_end { fi END_SCRIPT_TIME=$(date +%s) RUN_SCRIPT_TIME=$((END_SCRIPT_TIME-START_SCRIPT_TIME)) - print_info - print_info "Finished the script $(basename "$0")" - print_info "Elapsed time spent in the script: ${RUN_SCRIPT_TIME} seconds" - print_info "Exit code ${EXIT_CODE}" - print_info + if [[ ${BREEZE:=} != "true" ]]; then + print_info + print_info "Finished the script $(basename "$0")" + print_info "Elapsed time spent in the script: ${RUN_SCRIPT_TIME} seconds" + print_info "Exit code ${EXIT_CODE}" + print_info + fi remove_cache_directory } @@ -657,22 +694,19 @@ function filter_out_files_from_pylint_todo_list() { export FILTERED_FILES } -function rebuild_all_images_if_needed_and_confirmed() { +function rebuild_ci_image_if_needed_and_confirmed() { NEEDS_DOCKER_BUILD="false" - IMAGES_TO_REBUILD=() + THE_IMAGE_TYPE="CI" - for THE_IMAGE_TYPE in "${IMAGES_TO_CHECK[@]}" - do - check_if_docker_build_is_needed - done + check_if_docker_build_is_needed if [[ ${NEEDS_DOCKER_BUILD} == "true" ]]; then print_info - print_info "Docker image build is needed for ${IMAGES_TO_REBUILD[*]}!" + print_info "Docker image build is needed!" print_info else print_info - print_info "Docker image build is not needed for any of the image types!" + print_info "Docker image build is not needed!" print_info fi @@ -724,8 +758,10 @@ function build_image_on_ci() { "${AIRFLOW_SOURCES}/confirm" "Cleaning docker data and rebuilding" fi + prepare_build + # Cleanup docker installation. It should be empty in CI but let's not risk - docker system prune --all --force + verbose_docker system prune --all --force rm -rf "${BUILD_CACHE_DIR}" mkdir -pv "${BUILD_CACHE_DIR}" @@ -834,7 +870,7 @@ function check_and_save_allowed_param { } function run_docs() { - docker run "${EXTRA_DOCKER_FLAGS[@]}" -t \ + verbose_docker run "${EXTRA_DOCKER_FLAGS[@]}" -t \ --entrypoint "/usr/local/bin/dumb-init" \ --env PYTHONDONTWRITEBYTECODE \ --env VERBOSE \ @@ -846,3 +882,224 @@ function run_docs() { "--" "/opt/airflow/docs/build.sh" \ | tee -a "${OUTPUT_LOG}" } + +function pull_image_if_needed() { + # Whether to force pull images to populate cache + export FORCE_PULL_IMAGES=${FORCE_PULL_IMAGES:="false"} + # In CI environment we skip pulling latest python image + export PULL_BASE_IMAGES=${PULL_BASE_IMAGES:="false"} + + if [[ "${DOCKER_CACHE}" == "pulled" ]]; then + if [[ "${FORCE_PULL_IMAGES}" == "true" ]]; then + if [[ ${PULL_BASE_IMAGES} == "false" ]]; then + echo + echo "Skip force-pulling the ${PYTHON_BASE_IMAGE} image." + echo + else + echo + echo "Force pull base image ${PYTHON_BASE_IMAGE}" + echo + verbose_docker pull "${PYTHON_BASE_IMAGE}" + echo + fi + fi + IMAGES="${AIRFLOW_IMAGE}" + for IMAGE in ${IMAGES} + do + local PULL_IMAGE=${FORCE_PULL_IMAGES} + local IMAGE_HASH + IMAGE_HASH=$(verbose_docker images -q "${IMAGE}" 2> /dev/null) + if [[ "${IMAGE_HASH}" == "" ]]; then + PULL_IMAGE="true" + fi + if [[ "${PULL_IMAGE}" == "true" ]]; then + echo + echo "Pulling the image ${IMAGE}" + echo + verbose_docker pull "${IMAGE}" || true + echo + fi + done + fi +} + +function print_build_info() { + print_info + print_info "Airflow ${AIRFLOW_VERSION} Python: ${PYTHON_VERSION}. Image description: ${IMAGE_DESCRIPTION}" + print_info +} + +function spin() { + local FILE_TO_MONITOR=${1} + local SPIN=("-" "\\" "|" "/") + echo -n " Build log: ${FILE_TO_MONITOR} ${SPIN[0]}" > "${DETECTED_TERMINAL}" + + while "true" + do + for i in "${SPIN[@]}" + do + echo -ne "\b$i" > "${DETECTED_TERMINAL}" + local LAST_FILE_SIZE + local FILE_SIZE + LAST_FILE_SIZE=$(set +e; wc -c "${FILE_TO_MONITOR}" 2>/dev/null | awk '{print $1}' || true) + FILE_SIZE=${LAST_FILE_SIZE} + while [[ "${LAST_FILE_SIZE}" == "${FILE_SIZE}" ]]; + do + FILE_SIZE=$(set +e; wc -c "${FILE_TO_MONITOR}" 2>/dev/null | awk '{print $1}' || true) + sleep 0.2 + done + LAST_FILE_SIZE=FILE_SIZE + sleep 0.2 + if [[ ! -f "${FILE_TO_MONITOR}" ]]; then + exit + fi + done + done +} + +function build_image() { + print_build_info + echo + echo Building image "${IMAGE_DESCRIPTION}" + echo + pull_image_if_needed + + if [[ "${DOCKER_CACHE}" == "no-cache" ]]; then + export DOCKER_CACHE_CI_DIRECTIVE=("--no-cache") + elif [[ "${DOCKER_CACHE}" == "local" ]]; then + export DOCKER_CACHE_CI_DIRECTIVE=() + elif [[ "${DOCKER_CACHE}" == "pulled" ]]; then + export DOCKER_CACHE_CI_DIRECTIVE=( + "--cache-from" "${AIRFLOW_CI_IMAGE}" + ) + else + echo 2>&1 + echo 2>&1 "Error - thee ${DOCKER_CACHE} cache is unknown!" + echo 2>&1 + exit 1 + fi + if [[ -n ${DETECTED_TERMINAL:=""} ]]; then + echo -n "Building ${THE_IMAGE_TYPE}. + " > "${DETECTED_TERMINAL}" + spin "${OUTPUT_LOG}" & + SPIN_PID=$! + # shellcheck disable=SC2064 + trap "kill ${SPIN_PID}" SIGINT SIGTERM + fi + if [[ ${THE_IMAGE_TYPE} == "CI" ]]; then + set +u + verbose_docker build \ + --build-arg PYTHON_BASE_IMAGE="${PYTHON_BASE_IMAGE}" \ + --build-arg AIRFLOW_VERSION="${AIRFLOW_VERSION}" \ + --build-arg AIRFLOW_BRANCH="${BRANCH_NAME}" \ + --build-arg AIRFLOW_EXTRAS="${AIRFLOW_EXTRAS}" \ + --build-arg AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD="${AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD}" \ + "${DOCKER_CACHE_CI_DIRECTIVE[@]}" \ + -t "${AIRFLOW_CI_IMAGE}" \ + --target "${TARGET_IMAGE}" \ + . | tee -a "${OUTPUT_LOG}" + set -u + fi + if [[ -n "${DEFAULT_IMAGE:=}" ]]; then + verbose_docker tag "${AIRFLOW_IMAGE}" "${DEFAULT_IMAGE}" | tee -a "${OUTPUT_LOG}" + fi + if [[ -n ${SPIN_PID:=""} ]]; then + kill "${SPIN_PID}" || true + wait "${SPIN_PID}" || true + echo > "${DETECTED_TERMINAL}" + fi +} + +function remove_all_images() { + echo + "${AIRFLOW_SOURCES}/confirm" "Removing all local images ." + echo + verbose_docker rmi "${PYTHON_BASE_IMAGE}" || true + verbose_docker rmi "${AIRFLOW_CI_IMAGE}" || true + echo + echo "###################################################################" + echo "NOTE!! Removed Airflow images for Python version ${PYTHON_VERSION}." + echo " But the disk space in docker will be reclaimed only after" + echo " running 'docker system prune' command." + echo "###################################################################" + echo +} + +# Fixing permissions for all important files that are going to be added to Docker context +# This is necessary, because there are different default umask settings on different *NIX +# In case of some systems (especially in the CI environments) there is default +w group permission +# set automatically via UMASK when git checkout is performed. +# https://unix.stackexchange.com/questions/315121/why-is-the-default-umask-002-or-022-in-many-unix-systems-seems-insecure-by-defa +# Unfortunately default setting in git is to use UMASK by default: +# https://git-scm.com/docs/git-config/1.6.3.1#git-config-coresharedRepository +# This messes around with Docker context invalidation because the same files have different permissions +# and effectively different hash used for context validation calculation. +# +# We fix it by removing write permissions for other/group for all files that are in the Docker context. +# +# Since we can't (easily) tell what dockerignore would restrict, we'll just to +# it to "all" files in the git repo, making sure to exclude the www/static/docs +# symlink which is broken until the docs are built. +function filterout_deleted_files { + # Take NUL-separated stdin, return only files that exist on stdout NUL-separated + # This is to cope with users deleting files or folders locally but not doing `git rm` + xargs -0 "$STAT_BIN" --printf '%n\0' 2>/dev/null || true; +} + +function fix_group_permissions() { + if [[ ${PERMISSIONS_FIXED:=} == "true" ]]; then + echo + echo "Permissions already fixed" + echo + return + fi + echo + echo "Fixing group permissions" + pushd "${AIRFLOW_SOURCES}" >/dev/null + # This deals with files + git ls-files -z -- ./ | filterout_deleted_files | xargs -0 chmod og-w + # and this deals with directories + git ls-tree -z -r -d --name-only HEAD | filterout_deleted_files | xargs -0 chmod og-w,og+x + popd >/dev/null + echo "Fixed group permissions" + echo + export PERMISSIONS_FIXED="true" +} + +function set_common_image_variables { + export AIRFLOW_CI_IMAGE="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${DEFAULT_BRANCH}-python${PYTHON_VERSION}-ci" + export AIRFLOW_CI_SAVED_IMAGE_DIR="${BUILD_CACHE_DIR}/${DEFAULT_BRANCH}-python${PYTHON_VERSION}-ci-image" + export AIRFLOW_CI_IMAGE_ID_FILE="${BUILD_CACHE_DIR}/${DEFAULT_BRANCH}-python${PYTHON_VERSION}-ci-image.sha256" + export AIRFLOW_CI_IMAGE_DEFAULT="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${DEFAULT_BRANCH}-ci" + export PYTHON_BASE_IMAGE="python:${PYTHON_VERSION}-slim-stretch" + export BUILT_IMAGE_FLAG_FILE="${BUILD_CACHE_DIR}/${BRANCH_NAME}/.built_${PYTHON_VERSION}" + +} + + +function prepare_build() { + set_common_image_variables + go_to_airflow_sources + fix_group_permissions +} + +push_image() { + verbose_docker push "${AIRFLOW_IMAGE}" + if [[ -n ${DEFAULT_IMAGE:=""} ]]; then + verbose_docker push "${DEFAULT_IMAGE}" + fi +} + +function rebuild_ci_image_if_needed() { + export THE_IMAGE_TYPE="CI" + export IMAGE_DESCRIPTION="Airflow CI" + export TARGET_IMAGE="main" + export AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD="true" + export AIRFLOW_EXTRAS="devel_ci" + rebuild_image_if_needed +} + +function push_ci_image() { + export THE_IMAGE_TYPE="CI" + push_image +} diff --git a/scripts/ci/ci_build_dockerhub.sh b/scripts/ci/ci_build_dockerhub.sh new file mode 100755 index 0000000000000..58e7c0c110a8d --- /dev/null +++ b/scripts/ci/ci_build_dockerhub.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This is hook build used by DockerHub. We are also using it +# on Travis CI to potentially rebuild (and refresh layers that +# are not cached) Docker images that are used to run CI jobs +export FORCE_ANSWER_TO_QUESTIONS="yes" +export PULL_BASE_IMAGES="true" + +# In case of CRON jobs on Travis we run builds without cache +if [[ "${TRAVIS_EVENT_TYPE:=}" == "cron" ]]; then + echo + echo "Disabling cache for CRON jobs" + echo + export DOCKER_CACHE="no-cache" + export PULL_BASE_IMAGES="true" +fi + +if [[ -z ${DOCKER_TAG:=} ]]; then + echo + echo "Error! Missing DOCKER_TAG environment variable" + echo "Please specify DOCKER_TAG variable following the pattern BRANCH-pythonX.Y[-ci]" + echo + exit 1 +fi + +[[ ${DOCKER_TAG:=} =~ ${DEFAULT_BRANCH}-python([0-9.]*) ]] && export PYTHON_VERSION=${BASH_REMATCH[1]} + +if [[ -z ${PYTHON_VERSION:=} ]]; then + echo + echo "Error! Wrong DOCKER_TAG" + echo "The tag '${DOCKER_TAG}' should follow the pattern ${DEFAULT_BRANCH}-pythonX.Y[-ci]" + echo + exit 1 +fi + +echo "Detected PYTHON_VERSION=${PYTHON_VERSION}" +echo + +# shellcheck source=scripts/ci/_script_init.sh +. "$( dirname "${BASH_SOURCE[0]}" )/_script_init.sh" + +prepare_build +rm -rf "${BUILD_CACHE_DIR}" + +if [[ ${DOCKER_TAG} == *-ci ]]; then + rebuild_ci_image_if_needed + push_ci_image +fi diff --git a/scripts/ci/ci_check_license.sh b/scripts/ci/ci_check_license.sh index 846dc7847c0f2..b90f5312f06dc 100755 --- a/scripts/ci/ci_check_license.sh +++ b/scripts/ci/ci_check_license.sh @@ -35,6 +35,8 @@ function run_check_license() { | tee -a "${OUTPUT_LOG}" } +prepare_build + rebuild_ci_image_if_needed run_check_license diff --git a/scripts/ci/ci_docs.sh b/scripts/ci/ci_docs.sh index 3e29872fcc4c0..c36c6459421b8 100755 --- a/scripts/ci/ci_docs.sh +++ b/scripts/ci/ci_docs.sh @@ -34,6 +34,8 @@ function run_docs() { | tee -a "${OUTPUT_LOG}" } +prepare_build + rebuild_ci_image_if_needed run_docs diff --git a/scripts/ci/ci_flake8.sh b/scripts/ci/ci_flake8.sh index 93e99f8ebcd28..2706b8a79fdc1 100755 --- a/scripts/ci/ci_flake8.sh +++ b/scripts/ci/ci_flake8.sh @@ -50,6 +50,7 @@ function run_flake8() { fi } +prepare_build rebuild_ci_image_if_needed diff --git a/scripts/ci/ci_mypy.sh b/scripts/ci/ci_mypy.sh index 035a3bbfc2ced..60a9bd7623b2f 100755 --- a/scripts/ci/ci_mypy.sh +++ b/scripts/ci/ci_mypy.sh @@ -20,8 +20,6 @@ export PYTHON_VERSION=${PYTHON_VERSION:-3.6} # shellcheck source=scripts/ci/_script_init.sh . "$( dirname "${BASH_SOURCE[0]}" )/_script_init.sh" -rebuild_ci_image_if_needed - function run_mypy() { FILES=("$@") if [[ "${#FILES[@]}" == "0" ]]; then @@ -42,4 +40,8 @@ function run_mypy() { | tee -a "${OUTPUT_LOG}" } +prepare_build + +rebuild_ci_image_if_needed + run_mypy "$@" diff --git a/scripts/ci/ci_prepare_backport_packages.sh b/scripts/ci/ci_prepare_backport_packages.sh index bd6f96d8d056d..da62a7d0a676e 100755 --- a/scripts/ci/ci_prepare_backport_packages.sh +++ b/scripts/ci/ci_prepare_backport_packages.sh @@ -15,25 +15,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -set -euo pipefail - -MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -export AIRFLOW_CI_SILENT=${AIRFLOW_CI_SILENT:="true"} - export PYTHON_VERSION=${PYTHON_VERSION:-3.6} -# shellcheck source=scripts/ci/_utils.sh -. "${MY_DIR}/_utils.sh" +# shellcheck source=scripts/ci/_script_init.sh +. "$( dirname "${BASH_SOURCE[0]}" )/_script_init.sh" -initialize_breeze_environment - -basic_sanity_checks - -script_start - -cd "${MY_DIR}/../../backport_packages" +cd "${MY_DIR}/../../backport_packages" || exit 1 rm -rf dist/* rm -rf -- *.egg-info @@ -68,5 +55,3 @@ echo "Packages are prepared in ${DUMP_FILE}" if [[ "${CI:=false}" == "true" ]]; then curl -F "file=@${DUMP_FILE}" https://file.io fi - -script_end diff --git a/scripts/ci/ci_pylint_main.sh b/scripts/ci/ci_pylint_main.sh index 13be28e14f2b2..0815788556713 100755 --- a/scripts/ci/ci_pylint_main.sh +++ b/scripts/ci/ci_pylint_main.sh @@ -49,6 +49,8 @@ function run_pylint_main() { fi } +prepare_build + rebuild_ci_image_if_needed if [[ "${#@}" != "0" ]]; then @@ -62,5 +64,3 @@ if [[ "${#@}" != "0" ]]; then else run_pylint_main fi - -script_end diff --git a/scripts/ci/ci_pylint_tests.sh b/scripts/ci/ci_pylint_tests.sh index 6b2f5bb15a6d1..c9b9cbcc77422 100755 --- a/scripts/ci/ci_pylint_tests.sh +++ b/scripts/ci/ci_pylint_tests.sh @@ -50,6 +50,7 @@ function run_pylint_tests() { fi } +prepare_build rebuild_ci_image_if_needed diff --git a/scripts/ci/ci_refresh_pylint_todo.sh b/scripts/ci/ci_refresh_pylint_todo.sh index 2c431bc1e4c57..16fec0e6c5ce7 100755 --- a/scripts/ci/ci_refresh_pylint_todo.sh +++ b/scripts/ci/ci_refresh_pylint_todo.sh @@ -33,6 +33,8 @@ function refresh_pylint_todo() { | tee -a "${OUTPUT_LOG}" } +prepare_build + rebuild_ci_image_if_needed refresh_pylint_todo diff --git a/scripts/ci/ci_run_airflow_testing.sh b/scripts/ci/ci_run_airflow_testing.sh index f0de3a93e6c4f..b5fbef2555edc 100755 --- a/scripts/ci/ci_run_airflow_testing.sh +++ b/scripts/ci/ci_run_airflow_testing.sh @@ -24,10 +24,11 @@ if [[ -f ${BUILD_CACHE_DIR}/.skip_tests ]]; then echo echo "Skipping running tests !!!!!" echo - script_end exit fi +prepare_build + rebuild_ci_image_if_needed # Test environment diff --git a/scripts/ci/ci_run_all_static_checks.sh b/scripts/ci/ci_run_all_static_checks.sh index 129a4832b3b0c..65b2e8576aeae 100755 --- a/scripts/ci/ci_run_all_static_checks.sh +++ b/scripts/ci/ci_run_all_static_checks.sh @@ -24,13 +24,11 @@ if [[ -f ${BUILD_CACHE_DIR}/.skip_tests ]]; then echo echo "Skip tests" echo - script_end exit fi -rebuild_ci_image_if_needed +prepare_build -IMAGES_TO_CHECK=("CI") -export IMAGES_TO_CHECK +rebuild_ci_image_if_needed pre-commit run --all-files --show-diff-on-failure --verbose diff --git a/scripts/ci/ci_run_static_checks_pylint_tests.sh b/scripts/ci/ci_run_static_checks_pylint_tests.sh index 8cfb87527bea9..9fd3141e94faa 100755 --- a/scripts/ci/ci_run_static_checks_pylint_tests.sh +++ b/scripts/ci/ci_run_static_checks_pylint_tests.sh @@ -24,13 +24,11 @@ if [[ -f ${BUILD_CACHE_DIR}/.skip_tests ]]; then echo echo "Skip tests" echo - script_end exit fi -rebuild_ci_image_if_needed +prepare_build -IMAGES_TO_CHECK=("CI") -export IMAGES_TO_CHECK +rebuild_ci_image_if_needed pre-commit run pylint-tests --all-files --show-diff-on-failure --verbose diff --git a/scripts/ci/in_container/_in_container_utils.sh b/scripts/ci/in_container/_in_container_utils.sh index 11aa40bf99b41..33e659d3f9daa 100644 --- a/scripts/ci/in_container/_in_container_utils.sh +++ b/scripts/ci/in_container/_in_container_utils.sh @@ -49,60 +49,34 @@ function in_container_script_end() { fi } -function print_in_container_info() { - if [[ ${VERBOSE} == "true" ]]; then - echo "$@" - fi -} - # # Cleans up PYC files (in case they come in mounted folders) # function in_container_cleanup_pyc() { - if [[ ${VERBOSE} == "true" ]]; then - print_in_container_info - print_in_container_info "Cleaning up .pyc files" - print_in_container_info - fi set +o pipefail - NUM_FILES=$(sudo find . \ + sudo find . \ -path "./airflow/www/node_modules" -prune -o \ -path "./airflow/www_rbac/node_modules" -prune -o \ -path "./.eggs" -prune -o \ -path "./docs/_build" -prune -o \ -path "./build" -prune -o \ - -name "*.pyc" | grep ".pyc$" | sudo xargs rm -vf | wc -l) + -name "*.pyc" | grep ".pyc$" | sudo xargs rm -vf set -o pipefail - if [[ ${VERBOSE} == "true" ]]; then - print_in_container_info "Number of deleted .pyc files: ${NUM_FILES}" - print_in_container_info - print_in_container_info - fi } # # Cleans up __pycache__ directories (in case they come in mounted folders) # function in_container_cleanup_pycache() { - if [[ ${VERBOSE} == "true" ]]; then - print_in_container_info - print_in_container_info "Cleaning up __pycache__ directories" - print_in_container_info - fi set +o pipefail - NUM_FILES=$(find . \ + find . \ -path "./airflow/www/node_modules" -prune -o \ -path "./airflow/www_rbac/node_modules" -prune -o \ -path "./.eggs" -prune -o \ -path "./docs/_build" -prune -o \ -path "./build" -prune -o \ - -name "__pycache__" | grep "__pycache__" | sudo xargs rm -rvf | wc -l) + -name "__pycache__" | grep "__pycache__" | sudo xargs rm -rvf set -o pipefail - if [[ ${VERBOSE} == "true" ]]; then - print_in_container_info "Number of deleted __pycache__ dirs (and files): ${NUM_FILES}" - print_in_container_info - print_in_container_info - fi } # @@ -110,28 +84,13 @@ function in_container_cleanup_pycache() { # The host user. # function in_container_fix_ownership() { - if [[ ${VERBOSE} == "true" ]]; then - print_in_container_info - print_in_container_info "Changing ownership of root-owned files to ${HOST_USER_ID}.${HOST_GROUP_ID}" - print_in_container_info - fi set +o pipefail - sudo find . -user root | sudo xargs chown -v "${HOST_USER_ID}.${HOST_GROUP_ID}" --no-dereference | \ - wc -l | xargs -n 1 echo "Number of files with changed ownership:" + sudo find . -user root | sudo xargs chown -v "${HOST_USER_ID}.${HOST_GROUP_ID}" --no-dereference set -o pipefail - if [[ ${VERBOSE} == "true" ]]; then - print_in_container_info - print_in_container_info - fi } function in_container_go_to_airflow_sources() { pushd "${AIRFLOW_SOURCES}" &>/dev/null || exit 1 - if [[ ${VERBOSE} == "true" ]]; then - print_in_container_info - print_in_container_info "Running in $(pwd)" - print_in_container_info - fi } function in_container_basic_sanity_check() { @@ -143,13 +102,13 @@ function in_container_basic_sanity_check() { function in_container_refresh_pylint_todo() { if [[ ${VERBOSE} == "true" ]]; then - print_in_container_info - print_in_container_info "Refreshing list of all non-pylint compliant files. This can take some time." - print_in_container_info + echo + echo "Refreshing list of all non-pylint compliant files. This can take some time." + echo - print_in_container_info - print_in_container_info "Finding list all non-pylint compliant files everywhere except 'tests' folder" - print_in_container_info + echo + echo "Finding list all non-pylint compliant files everywhere except 'tests' folder" + echo fi # Using path -prune is much better in the local environment on OSX because we have host # Files mounted and node_modules is a huge directory which takes many seconds to even scan @@ -174,13 +133,13 @@ function in_container_refresh_pylint_todo() { awk 'BEGIN{FS=":"}{print "./"$1}' | sort | uniq > "${MY_DIR}/../pylint_todo_new.txt" if [[ ${VERBOSE} == "true" ]]; then - print_in_container_info - print_in_container_info "So far found $(wc -l <"${MY_DIR}/../pylint_todo_new.txt") files" - print_in_container_info + echo + echo "So far found $(wc -l <"${MY_DIR}/../pylint_todo_new.txt") files" + echo - print_in_container_info - print_in_container_info "Finding list of all non-pylint compliant files in 'tests' folder" - print_in_container_info + echo + echo "Finding list of all non-pylint compliant files in 'tests' folder" + echo fi find "./tests" -name "*.py" -print0 | \ xargs -0 pylint --disable="${DISABLE_CHECKS_FOR_TESTS}" | tee "${MY_DIR}/../pylint_todo_tests.txt" @@ -193,9 +152,9 @@ function in_container_refresh_pylint_todo() { mv -v "${MY_DIR}/../pylint_todo_new.txt" "${MY_DIR}/../pylint_todo.txt" if [[ ${VERBOSE} == "true" ]]; then - print_in_container_info - print_in_container_info "Found $(wc -l <"${MY_DIR}/../pylint_todo.txt") files" - print_in_container_info + echo + echo "Found $(wc -l <"${MY_DIR}/../pylint_todo.txt") files" + echo fi } diff --git a/scripts/ci/in_container/entrypoint_ci.sh b/scripts/ci/in_container/entrypoint_ci.sh index 2da2a1806f347..92484dbe63411 100755 --- a/scripts/ci/in_container/entrypoint_ci.sh +++ b/scripts/ci/in_container/entrypoint_ci.sh @@ -150,7 +150,14 @@ if [[ "${RUNTIME}" == "" ]]; then # SSH Service sudo service ssh restart >/dev/null 2>&1 - ssh-keyscan -H localhost >> ~/.ssh/known_hosts + + # Sometimes the server is not quick enough to load the keys! + while [[ $(ssh-keyscan -H localhost 2>/dev/null | wc -l) != "3" ]] ; do + echo "Not all keys yet loaded by the server" + sleep 0.05 + done + + ssh-keyscan -H localhost >> ~/.ssh/known_hosts 2>/dev/null fi diff --git a/scripts/ci/in_container/run_flake8.sh b/scripts/ci/in_container/run_flake8.sh index 288cea9db22ad..05b8abad17812 100755 --- a/scripts/ci/in_container/run_flake8.sh +++ b/scripts/ci/in_container/run_flake8.sh @@ -18,16 +18,6 @@ # shellcheck source=scripts/ci/in_container/_in_container_script_init.sh . "$( dirname "${BASH_SOURCE[0]}" )/_in_container_script_init.sh" -if [[ ${#@} == "0" ]]; then - print_in_container_info - print_in_container_info "Running flake8 with no parameters" - print_in_container_info -else - print_in_container_info - print_in_container_info "Running flake8 with parameters: $*" - print_in_container_info -fi - set +e flake8 "$@" diff --git a/scripts/ci/in_container/run_mypy.sh b/scripts/ci/in_container/run_mypy.sh index c57121d131f8e..5579288527cda 100755 --- a/scripts/ci/in_container/run_mypy.sh +++ b/scripts/ci/in_container/run_mypy.sh @@ -18,12 +18,6 @@ # Script to run mypy on all code. Can be started from any working directory # shellcheck source=scripts/ci/in_container/_in_container_script_init.sh . "$( dirname "${BASH_SOURCE[0]}" )/_in_container_script_init.sh" - -print_in_container_info -print_in_container_info "Running mypy with parameters: $*" -print_in_container_info -print_in_container_info - set +e mypy "$@" diff --git a/scripts/ci/in_container/run_pylint_main.sh b/scripts/ci/in_container/run_pylint_main.sh index 09de88b403e15..bb3188b00202b 100755 --- a/scripts/ci/in_container/run_pylint_main.sh +++ b/scripts/ci/in_container/run_pylint_main.sh @@ -46,10 +46,6 @@ if [[ ${#@} == "0" ]]; then grep -vFf scripts/ci/pylint_todo.txt | xargs pylint --output-format=colorized RES=$? else - print_in_container_info - print_in_container_info "Running Pylint with parameters: $*" - print_in_container_info - echo "PATH=${PATH}" /usr/local/bin/pylint --output-format=colorized "$@" RES=$? fi diff --git a/scripts/ci/in_container/run_pylint_tests.sh b/scripts/ci/in_container/run_pylint_tests.sh index 4b07762428396..c9fc35ee4d9d0 100755 --- a/scripts/ci/in_container/run_pylint_tests.sh +++ b/scripts/ci/in_container/run_pylint_tests.sh @@ -32,9 +32,6 @@ if [[ ${#@} == "0" ]]; then xargs pylint -j 0 --disable="${DISABLE_CHECKS_FOR_TESTS}" --output-format=colorized RES=$? else - print_in_container_info - print_in_container_info "Running Pylint for tests with parameters: $*" - print_in_container_info # running pylint using built-in parallel functionality might speed it up pylint -j 0 --disable="${DISABLE_CHECKS_FOR_TESTS}" --output-format=colorized "$@" RES=$? diff --git a/scripts/ci/pre_commit_ci_build.sh b/scripts/ci/pre_commit_ci_build.sh index 1ed9490356275..d612332333dad 100755 --- a/scripts/ci/pre_commit_ci_build.sh +++ b/scripts/ci/pre_commit_ci_build.sh @@ -21,4 +21,6 @@ export PYTHON_VERSION=${PYTHON_VERSION:-3.6} # shellcheck source=scripts/ci/_script_init.sh . "$( dirname "${BASH_SOURCE[0]}" )/_script_init.sh" -rebuild_all_images_if_needed_and_confirmed +prepare_build + +rebuild_ci_image_if_needed_and_confirmed diff --git a/scripts/ci/pre_commit_setup_cfg_file.sh b/scripts/ci/pre_commit_setup_cfg_file.sh index e11eafddc2bcf..88ff838792b24 100755 --- a/scripts/ci/pre_commit_setup_cfg_file.sh +++ b/scripts/ci/pre_commit_setup_cfg_file.sh @@ -25,7 +25,6 @@ TMP_OUTPUT=$(mktemp) cd "${MY_DIR}/../../" || exit; -export AIRFLOW_CI_SILENT="true" find "licenses" -type f -exec echo " " {} \; | sort >>"${TMP_FILE}" SETUP_CFG_FILE="${MY_DIR}/../../setup.cfg"