diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1cbab48546b8..ff569fcfe171 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1366,7 +1366,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" push-buildx-cache-to-github-registry: permissions: packages: write - timeout-minutes: 40 + timeout-minutes: 120 name: "Push images as cache to GitHub Registry" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} needs: @@ -1383,6 +1383,9 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} + # Build cache for both platforms for development even if we are releasing + # PROD images only for amd64 + PLATFORM: "linux/amd64,linux/arm64" # Rebuild images before push using the latest constraints (just pushed) without # eager upgrade. Do not wait for images, but rebuild them UPGRADE_TO_NEWER_DEPENDENCIES: "false" diff --git a/BREEZE.rst b/BREEZE.rst index 128347364cd0..3ce3068a1258 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -1277,7 +1277,7 @@ This is the current syntax for `./breeze <./breeze>`_: One of: - linux/amd64 + linux/amd64 linux/arm64 linux/amd64,linux/arm64 -d, --debian DEBIAN_VERSION @@ -1492,7 +1492,7 @@ This is the current syntax for `./breeze <./breeze>`_: One of: - linux/amd64 + linux/amd64 linux/arm64 linux/amd64,linux/arm64 -d, --debian DEBIAN_VERSION @@ -1567,7 +1567,7 @@ This is the current syntax for `./breeze <./breeze>`_: One of: - linux/amd64 + linux/amd64 linux/arm64 linux/amd64,linux/arm64 -d, --debian DEBIAN_VERSION @@ -1649,7 +1649,7 @@ This is the current syntax for `./breeze <./breeze>`_: One of: - linux/amd64 + linux/amd64 linux/arm64 linux/amd64,linux/arm64 -d, --debian DEBIAN_VERSION @@ -1700,7 +1700,7 @@ This is the current syntax for `./breeze <./breeze>`_: One of: - linux/amd64 + linux/amd64 linux/arm64 linux/amd64,linux/arm64 -d, --debian DEBIAN_VERSION @@ -1910,7 +1910,7 @@ This is the current syntax for `./breeze <./breeze>`_: One of: - linux/amd64 + linux/amd64 linux/arm64 linux/amd64,linux/arm64 -d, --debian DEBIAN_VERSION @@ -1994,7 +1994,7 @@ This is the current syntax for `./breeze <./breeze>`_: One of: - linux/amd64 + linux/amd64 linux/arm64 linux/amd64,linux/arm64 -d, --debian DEBIAN_VERSION @@ -2409,7 +2409,7 @@ This is the current syntax for `./breeze <./breeze>`_: One of: - linux/amd64 + linux/amd64 linux/arm64 linux/amd64,linux/arm64 -d, --debian DEBIAN_VERSION diff --git a/Dockerfile b/Dockerfile index 6efb10aa05e0..bea8b1a25f81 100644 --- a/Dockerfile +++ b/Dockerfile @@ -482,16 +482,8 @@ ARG AIRFLOW_VERSION # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation # to learn more about the way how signals are handled by the image # Also set airflow as nice PROMPT message. -# LD_PRELOAD is to workaround https://github.com/apache/airflow/issues/17546 -# issue with /usr/lib/x86_64-linux-gnu/libstdc++.so.6: cannot allocate memory in static TLS block -# We do not yet a more "correct" solution to the problem but in order to avoid raising new issues -# by users of the prod image, we implement the workaround now. -# The side effect of this is slightly (in the range of 100s of milliseconds) slower load for any -# binary started and a little memory used for Heap allocated by initialization of libstdc++ -# This overhead is not happening for binaries that already link dynamically libstdc++ ENV DUMB_INIT_SETSID="1" \ PS1="(airflow)" \ - LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libstdc++.so.6" \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW__CORE__LOAD_EXAMPLES="false" \ PIP_USER="true" diff --git a/Dockerfile.ci b/Dockerfile.ci index ea782dcee8eb..18fb20feefb3 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -138,8 +138,9 @@ ARG RUNTIME_APT_DEPS="\ ARG HELM_VERSION="v3.6.3" RUN SYSTEM=$(uname -s | tr '[:upper:]' '[:lower:]') \ - && HELM_URL="https://get.helm.sh/helm-${HELM_VERSION}-${SYSTEM}-amd64.tar.gz" \ - && curl --silent --location "${HELM_URL}" | tar -xz -O "${SYSTEM}"-amd64/helm > /usr/local/bin/helm \ + && PLATFORM=$([ "$(uname -m)" = "aarch64" ] && echo "arm64" || echo "amd64" ) \ + && HELM_URL="https://get.helm.sh/helm-${HELM_VERSION}-${SYSTEM}-${PLATFORM}.tar.gz" \ + && curl --silent --location "${HELM_URL}" | tar -xz -O "${SYSTEM}-${PLATFORM}/helm" > /usr/local/bin/helm \ && chmod +x /usr/local/bin/helm ARG ADDITIONAL_RUNTIME_APT_DEPS="" @@ -370,15 +371,6 @@ ENV PATH="/files/bin/:/opt/airflow/scripts/in_container/bin/:${PATH}" \ BUILD_ID=${BUILD_ID} \ COMMIT_SHA=${COMMIT_SHA} -# This one is to workaround https://github.com/apache/airflow/issues/17546 -# issue with /usr/lib/x86_64-linux-gnu/libstdc++.so.6: cannot allocate memory in static TLS block -# We do not yet a more "correct" solution to the problem but in order to avoid raising new issues -# by users of the prod image, we implement the workaround now. -# The side effect of this is slightly (in the range of 100s of milliseconds) slower load for any -# binary started and a little memory used for Heap allocated by initialization of libstdc++ -# This overhead is not happening for binaries that already link dynamically libstdc++ -ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libstdc++.so.6" - # Link dumb-init for backwards compatibility (so that older images also work) RUN ln -sf /usr/bin/dumb-init /usr/local/bin/dumb-init diff --git a/README.md b/README.md index fa90fefe2546..e30361470ee2 100644 --- a/README.md +++ b/README.md @@ -88,11 +88,14 @@ Apache Airflow is tested with: | | Main version (dev) | Stable version (2.2.4) | |---------------------|---------------------|--------------------------| | Python | 3.7, 3.8, 3.9 | 3.6, 3.7, 3.8, 3.9 | +| Platform | AMD64/ARM64(\*) | AMD64 | | Kubernetes | 1.20, 1.21 | 1.18, 1.19, 1.20 | | PostgreSQL | 10, 11, 12, 13 | 9.6, 10, 11, 12, 13 | | MySQL | 5.7, 8 | 5.7, 8 | | SQLite | 3.15.0+ | 3.15.0+ | -| MSSQL(Experimental) | 2017, 2019 | | +| MSSQL | 2017(\*), 2019 (\*) | | + +\* Experimental **Note**: MySQL 5.x versions are unable to or have limitations with running multiple schedulers -- please see the [Scheduler docs](https://airflow.apache.org/docs/apache-airflow/stable/scheduler.html). diff --git a/airflow/providers/google/cloud/hooks/cloud_sql.py b/airflow/providers/google/cloud/hooks/cloud_sql.py index aa417c4b1032..b5225dea07ac 100644 --- a/airflow/providers/google/cloud/hooks/cloud_sql.py +++ b/airflow/providers/google/cloud/hooks/cloud_sql.py @@ -451,7 +451,7 @@ def _download_sql_proxy_if_needed(self) -> None: self.log.info("cloud-sql-proxy is already present") return system = platform.system().lower() - processor = "amd64" if CloudSqlProxyRunner._is_os_64bit() else "386" + processor = os.uname().machine if not self.sql_proxy_version: download_url = CLOUD_SQL_PROXY_DOWNLOAD_URL.format(system, processor) else: diff --git a/breeze b/breeze index a6e331789bd9..4b1dae501d80 100755 --- a/breeze +++ b/breeze @@ -3389,7 +3389,6 @@ function breeze::run_build_command() { build_images::prepare_prod_build build_images::build_prod_images else - build_images::prepare_ci_build build_images::rebuild_ci_image_if_needed fi @@ -3500,6 +3499,12 @@ function breeze::run_breeze_command() { case "${command_to_run}" in enter_breeze) docker_engine_resources::check_all_resources + if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + if [[ ${BACKEND} == "mysql" || ${BACKEND} == "mssql" ]]; then + echo "${COLOR_RED}MacOS with ARM processor is not supported for ${BACKEND} backend. Exiting.${COLOR_RESET}" + exit 1 + fi + fi if [[ ${PRODUCTION_IMAGE} == "true" ]]; then echo "${COLOR_RED}ERROR: Entering production image via breeze is not supported${COLOR_RESET}" echo diff --git a/breeze-complete b/breeze-complete index f9fbc4d71ddd..a3b8f1bcc66d 100644 --- a/breeze-complete +++ b/breeze-complete @@ -39,7 +39,7 @@ _breeze_allowed_executors="KubernetesExecutor CeleryExecutor LocalExecutor Celer _breeze_allowed_test_types="All Always Core Providers API CLI Integration Other WWW Postgres MySQL Helm Quarantined" _breeze_allowed_package_formats="both sdist wheel" _breeze_allowed_installation_methods=". apache-airflow" -_breeze_allowed_platforms="linux/amd64" +_breeze_allowed_platforms="linux/amd64 linux/arm64 linux/amd64,linux/arm64" # shellcheck disable=SC2034 { diff --git a/chart/dockerfiles/pgbouncer-exporter/Dockerfile b/chart/dockerfiles/pgbouncer-exporter/Dockerfile index 7b2a839535fd..121c15cff858 100644 --- a/chart/dockerfiles/pgbouncer-exporter/Dockerfile +++ b/chart/dockerfiles/pgbouncer-exporter/Dockerfile @@ -23,9 +23,10 @@ WORKDIR /usr/src/myapp SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] -RUN URL="https://github.com/jbub/pgbouncer_exporter/archive/v${PGBOUNCER_EXPORTER_VERSION}.tar.gz" && \ - curl -L "${URL}" | tar -zx --strip-components 1 && \ - GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -v +RUN URL="https://github.com/jbub/pgbouncer_exporter/archive/v${PGBOUNCER_EXPORTER_VERSION}.tar.gz" \ + && curl -L "${URL}" | tar -zx --strip-components 1 \ + && PLATFORM=$([ "$(uname -m)" = "aarch64" ] && echo "arm64" || echo "amd64" )\ + && GOOS=linux GOARCH="${PLATFORM}" CGO_ENABLED=0 go build -v FROM alpine:${ALPINE_VERSION} AS final diff --git a/dev/REFRESHING_CI_CACHE.md b/dev/REFRESHING_CI_CACHE.md index aca3ace762e0..8d2209d6a141 100644 --- a/dev/REFRESHING_CI_CACHE.md +++ b/dev/REFRESHING_CI_CACHE.md @@ -78,7 +78,16 @@ git push # Manually refreshing the images Note that in order to refresh images you have to not only have `buildx` command installed for docker, -but you should also make sure that you have the buildkit builder configured and set. +but you should also make sure that you have the buildkit builder configured and set. Since we also build +multi-platform images (for both AMD and ARM), you need to have support for qemu installed with appropriate +flags. + +According to the [official installation instructions](https://docs.docker.com/buildx/working-with-buildx/#build-multi-platform-images) +this can be achieved via: + +```shell +docker run --privileged --rm tonistiigi/binfmt --install all +``` More information can be found [here](https://docs.docker.com/engine/reference/commandline/buildx_create/) diff --git a/dev/breeze/src/airflow_breeze/ci/build_params.py b/dev/breeze/src/airflow_breeze/ci/build_params.py index 493d86c3cce4..b9bb4905dc84 100644 --- a/dev/breeze/src/airflow_breeze/ci/build_params.py +++ b/dev/breeze/src/airflow_breeze/ci/build_params.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +import os from dataclasses import dataclass from datetime import datetime from typing import List, Optional @@ -56,7 +56,7 @@ class BuildParams: additional_runtime_apt_command: str = "" additional_runtime_apt_deps: str = "" additional_runtime_apt_env: str = "" - platform: str = "linux/amd64" + platform: str = f"linux/{os.uname().machine}" debian_version: str = "bullseye" upgrade_to_newer_dependencies: str = "true" diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 48c8f5d04ea1..b42ce2e68d27 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import os from pathlib import Path from typing import List @@ -197,7 +198,7 @@ def get_available_packages() -> List[str]: # Initialise base variables -DOCKER_DEFAULT_PLATFORM = "linux/amd64" +DOCKER_DEFAULT_PLATFORM = f"linux/{os.uname().machine}" DOCKER_BUILDKIT = 1 SSH_PORT = "12322" diff --git a/dev/refresh_images.sh b/dev/refresh_images.sh index 8ee6b48d5abe..c883a28f95cd 100755 --- a/dev/refresh_images.sh +++ b/dev/refresh_images.sh @@ -31,5 +31,5 @@ fi python_version=$1 -./breeze prepare-build-cache --python "${python_version}" --verbose -./breeze prepare-build-cache --python "${python_version}" --production-image --verbose +./breeze prepare-build-cache --python "${python_version}" --platform linux/amd64,linux/arm64 --verbose +./breeze prepare-build-cache --python "${python_version}" --platform linux/amd64,linux/arm64 --production-image --verbose diff --git a/scripts/ci/libraries/_build_images.sh b/scripts/ci/libraries/_build_images.sh index 80f6e811b0f9..09beaee3db11 100644 --- a/scripts/ci/libraries/_build_images.sh +++ b/scripts/ci/libraries/_build_images.sh @@ -422,19 +422,19 @@ function build_images::rebuild_ci_image_if_needed_with_group() { } # Builds CI image - depending on the caching strategy (pulled, local, disabled) it -# passes the necessary docker build flags via docker_ci_cache_directive array +# passes the necessary docker build flags via docker_ci_directive array # it also passes the right Build args depending on the configuration of the build # selected by Breeze flags or environment variables. function build_images::build_ci_image() { build_images::check_if_buildx_plugin_available build_images::print_build_info - local docker_ci_cache_directive + local docker_ci_directive if [[ "${DOCKER_CACHE}" == "disabled" ]]; then - docker_ci_cache_directive=("--no-cache") + docker_ci_directive=("--no-cache") elif [[ "${DOCKER_CACHE}" == "local" ]]; then - docker_ci_cache_directive=() + docker_ci_directive=() elif [[ "${DOCKER_CACHE}" == "pulled" ]]; then - docker_ci_cache_directive=( + docker_ci_directive=( "--cache-from=${AIRFLOW_CI_IMAGE}:cache" ) else @@ -446,10 +446,19 @@ function build_images::build_ci_image() { if [[ ${PREPARE_BUILDX_CACHE} == "true" ]]; then # we need to login to docker registry so that we can push cache there build_images::login_to_docker_registry - docker_ci_cache_directive+=( + docker_ci_directive+=( "--cache-to=type=registry,ref=${AIRFLOW_CI_IMAGE}:cache" - "--load" + "--push" ) + if [[ ${PLATFORM} =~ .*,.* ]]; then + echo + echo "Skip loading docker image on multi-platform build" + echo + else + docker_ci_directive+=( + "--load" + ) + fi fi local extra_docker_ci_flags=() if [[ ${CI} == "true" ]]; then @@ -506,14 +515,10 @@ function build_images::build_ci_image() { --build-arg COMMIT_SHA="${COMMIT_SHA}" \ "${additional_dev_args[@]}" \ "${additional_runtime_args[@]}" \ - "${docker_ci_cache_directive[@]}" \ + "${docker_ci_directive[@]}" \ -t "${AIRFLOW_CI_IMAGE}" \ --target "main" \ . -f Dockerfile.ci - if [[ ${PREPARE_BUILDX_CACHE} == "true" ]]; then - # Push the image as "latest" so that it can be used in Breeze - docker_v push "${AIRFLOW_CI_IMAGE}" - fi set -u if [[ -n "${IMAGE_TAG=}" ]]; then echo "Tagging additionally image ${AIRFLOW_CI_IMAGE} with ${IMAGE_TAG}" @@ -573,7 +578,7 @@ function build_images::prepare_prod_build() { } # Builds PROD image - depending on the caching strategy (pulled, local, disabled) it -# passes the necessary docker build flags via DOCKER_CACHE_PROD_DIRECTIVE and +# passes the necessary docker build flags via docker_prod_directive and # docker_cache_prod_build_directive (separate caching options are needed for "build" segment of the image) # it also passes the right Build args depending on the configuration of the build # selected by Breeze flags or environment variables. @@ -588,14 +593,15 @@ function build_images::build_prod_images() { echo return fi - local docker_cache_prod_directive + local docker_prod_directive if [[ "${DOCKER_CACHE}" == "disabled" ]]; then - docker_cache_prod_directive=("--no-cache") + docker_prod_directive=("--no-cache") elif [[ "${DOCKER_CACHE}" == "local" ]]; then - docker_cache_prod_directive=() + docker_prod_directive=() elif [[ "${DOCKER_CACHE}" == "pulled" ]]; then - docker_cache_prod_directive=( + docker_prod_directive=( "--cache-from=${AIRFLOW_PROD_IMAGE}:cache" + "--push" ) else echo @@ -608,10 +614,18 @@ function build_images::build_prod_images() { # we need to login to docker registry so that we can push cache there build_images::login_to_docker_registry # Cache for prod image contains also build stage for buildx when mode=max specified! - docker_cache_prod_directive+=( + docker_prod_directive+=( "--cache-to=type=registry,ref=${AIRFLOW_PROD_IMAGE}:cache,mode=max" - "--load" ) + if [[ ${PLATFORM} =~ .*,.* ]]; then + echo + echo "Skip loading docker image on multi-platform build" + echo + else + docker_prod_directive+=( + "--load" + ) + fi fi set +u local additional_dev_args=() @@ -661,14 +675,10 @@ function build_images::build_prod_images() { --build-arg AIRFLOW_IMAGE_README_URL="https://raw.githubusercontent.com/apache/airflow/${COMMIT_SHA}/docs/docker-stack/README.md" \ "${additional_dev_args[@]}" \ "${additional_runtime_args[@]}" \ - "${docker_cache_prod_directive[@]}" \ + "${docker_prod_directive[@]}" \ -t "${AIRFLOW_PROD_IMAGE}" \ --target "main" \ . -f Dockerfile - if [[ ${PREPARE_BUILDX_CACHE} == "true" ]]; then - # Push the image as "latest" so that it can be used in Breeze - docker_v push "${AIRFLOW_PROD_IMAGE}" - fi set -u if [[ -n "${IMAGE_TAG=}" ]]; then echo "Tagging additionally image ${AIRFLOW_PROD_IMAGE} with ${IMAGE_TAG}" diff --git a/scripts/ci/libraries/_initialization.sh b/scripts/ci/libraries/_initialization.sh index 11d6be1ea124..b690ed5bfbb2 100644 --- a/scripts/ci/libraries/_initialization.sh +++ b/scripts/ci/libraries/_initialization.sh @@ -85,7 +85,14 @@ function initialization::create_directories() { function initialization::initialize_base_variables() { # until we have support for ARM images, we set docker default platform to linux/AMD # so that all breeze commands use emulation - export PLATFORM=${PLATFORM:="linux/amd64"} + local machine + if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + machine="arm64" + else + machine="amd64" + fi + + export PLATFORM=${PLATFORM:="linux/${machine}"} # enable buildkit for builds export DOCKER_BUILDKIT=1 diff --git a/scripts/ci/libraries/_kind.sh b/scripts/ci/libraries/_kind.sh index 8468506acba3..fe951771fa20 100644 --- a/scripts/ci/libraries/_kind.sh +++ b/scripts/ci/libraries/_kind.sh @@ -40,8 +40,14 @@ function kind::dump_kind_logs() { function kind::make_sure_kubernetes_tools_are_installed() { local system system=$(uname -s | tr '[:upper:]' '[:lower:]') - - local kind_url="https://github.com/kubernetes-sigs/kind/releases/download/${KIND_VERSION}/kind-${system}-amd64" + local machine + if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + machine="arm64" + else + machine="amd64" + fi + local kind_url + kind_url="https://github.com/kubernetes-sigs/kind/releases/download/${KIND_VERSION}/kind-${system}-${machine}" mkdir -pv "${BUILD_CACHE_DIR}/kubernetes-bin/${KUBERNETES_VERSION}" if [[ -f "${KIND_BINARY_PATH}" ]]; then local downloaded_kind_version @@ -58,8 +64,8 @@ function kind::make_sure_kubernetes_tools_are_installed() { echo "Kind version ok" echo fi - - local kubectl_url="https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/${system}/amd64/kubectl" + local kubectl_url + kubectl_url="https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/${system}/${machine}/kubectl" if [[ -f "${KUBECTL_BINARY_PATH}" ]]; then local downloaded_kubectl_version downloaded_kubectl_version="$(${KUBECTL_BINARY_PATH} version --client=true --short | awk '{ print $3 }')" @@ -75,8 +81,8 @@ function kind::make_sure_kubernetes_tools_are_installed() { echo "Kubectl version ok" echo fi - - local helm_url="https://get.helm.sh/helm-${HELM_VERSION}-${system}-amd64.tar.gz" + local helm_url + helm_url="https://get.helm.sh/helm-${HELM_VERSION}-${system}-${machine}.tar.gz" if [[ -f "${HELM_BINARY_PATH}" ]]; then local downloaded_helm_version downloaded_helm_version="$(${HELM_BINARY_PATH} version --template '{{.Version}}')" @@ -86,7 +92,7 @@ function kind::make_sure_kubernetes_tools_are_installed() { echo echo "Downloading Helm version ${HELM_VERSION}" repeats::run_with_retry 4 \ - "curl --connect-timeout 60 --max-time 180 --location '${helm_url}' | tar -xvz -O '${system}-amd64/helm' >'${HELM_BINARY_PATH}'" + "curl --connect-timeout 60 --max-time 180 --location '${helm_url}' | tar -xvz -O '${system}-${machine}/helm' >'${HELM_BINARY_PATH}'" chmod a+x "${HELM_BINARY_PATH}" else echo "Helm version ok" diff --git a/scripts/ci/libraries/_runs.sh b/scripts/ci/libraries/_runs.sh index 84b31f4ef84a..194dc21e08d6 100644 --- a/scripts/ci/libraries/_runs.sh +++ b/scripts/ci/libraries/_runs.sh @@ -21,10 +21,9 @@ function runs::run_docs() { start_end::group_start "Run build docs" docker_v run "${EXTRA_DOCKER_FLAGS[@]}" -t \ -e "GITHUB_ACTIONS=${GITHUB_ACTIONS="false"}" \ - --entrypoint "/usr/local/bin/dumb-init" \ --pull never \ "${AIRFLOW_CI_IMAGE_WITH_TAG}" \ - "--" "/opt/airflow/scripts/in_container/run_docs_build.sh" "${@}" + "/opt/airflow/scripts/in_container/run_docs_build.sh" "${@}" start_end::group_end } @@ -32,10 +31,9 @@ function runs::run_docs() { function runs::run_generate_constraints() { start_end::group_start "Run generate constraints" docker_v run "${EXTRA_DOCKER_FLAGS[@]}" \ - --entrypoint "/usr/local/bin/dumb-init" \ --pull never \ "${AIRFLOW_CI_IMAGE_WITH_TAG}" \ - "--" "/opt/airflow/scripts/in_container/run_generate_constraints.sh" + "/opt/airflow/scripts/in_container/run_generate_constraints.sh" start_end::group_end } @@ -43,12 +41,11 @@ function runs::run_generate_constraints() { function runs::run_prepare_airflow_packages() { start_end::group_start "Run prepare airflow packages" docker_v run "${EXTRA_DOCKER_FLAGS[@]}" \ - --entrypoint "/usr/local/bin/dumb-init" \ -t \ -v "${AIRFLOW_SOURCES}:/opt/airflow" \ --pull never \ "${AIRFLOW_CI_IMAGE_WITH_TAG}" \ - "--" "/opt/airflow/scripts/in_container/run_prepare_airflow_packages.sh" + "/opt/airflow/scripts/in_container/run_prepare_airflow_packages.sh" start_end::group_end } @@ -57,12 +54,11 @@ function runs::run_prepare_airflow_packages() { function runs::run_prepare_provider_packages() { # No group here - groups are added internally docker_v run "${EXTRA_DOCKER_FLAGS[@]}" \ - --entrypoint "/usr/local/bin/dumb-init" \ -t \ -v "${AIRFLOW_SOURCES}:/opt/airflow" \ --pull never \ "${AIRFLOW_CI_IMAGE_WITH_TAG}" \ - "--" "/opt/airflow/scripts/in_container/run_prepare_provider_packages.sh" "${@}" + "/opt/airflow/scripts/in_container/run_prepare_provider_packages.sh" "${@}" } # Docker command to generate release notes for provider packages @@ -73,7 +69,6 @@ function runs::run_prepare_provider_documentation() { fi # No group here - groups are added internally docker_v run "${EXTRA_DOCKER_FLAGS[@]}" \ - --entrypoint "/usr/local/bin/dumb-init" \ "${term_flag}" \ -v "${AIRFLOW_SOURCES}:/opt/airflow" \ -e "NON_INTERACTIVE" \ @@ -81,5 +76,5 @@ function runs::run_prepare_provider_documentation() { -e "GITHUB_TOKEN" \ --pull never \ "${AIRFLOW_CI_IMAGE_WITH_TAG}" \ - "--" "/opt/airflow/scripts/in_container/run_prepare_provider_documentation.sh" "${@}" + "/opt/airflow/scripts/in_container/run_prepare_provider_documentation.sh" "${@}" } diff --git a/scripts/ci/pre_commit/pre_commit_check_license.sh b/scripts/ci/pre_commit/pre_commit_check_license.sh index ec4bea4774a2..909ad691a2e9 100755 --- a/scripts/ci/pre_commit/pre_commit_check_license.sh +++ b/scripts/ci/pre_commit/pre_commit_check_license.sh @@ -64,4 +64,8 @@ function run_check_license() { fi } -run_check_license +if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + echo "Skip RAT check on ARM devices util we push multiplatform images" +else + run_check_license +fi diff --git a/scripts/ci/pre_commit/pre_commit_flake8.sh b/scripts/ci/pre_commit/pre_commit_flake8.sh index 759a7b213322..408d593fb750 100755 --- a/scripts/ci/pre_commit/pre_commit_flake8.sh +++ b/scripts/ci/pre_commit/pre_commit_flake8.sh @@ -25,14 +25,10 @@ export PRINT_INFO_FROM_SCRIPTS="false" function run_flake8() { if [[ "${#@}" == "0" ]]; then docker_v run "${EXTRA_DOCKER_FLAGS[@]}" \ - --entrypoint "/usr/local/bin/dumb-init" \ - "${AIRFLOW_CI_IMAGE}" \ - "--" "/opt/airflow/scripts/in_container/run_flake8.sh" + "${AIRFLOW_CI_IMAGE}" "/opt/airflow/scripts/in_container/run_flake8.sh" else docker_v run "${EXTRA_DOCKER_FLAGS[@]}" \ - --entrypoint "/usr/local/bin/dumb-init" \ - "${AIRFLOW_CI_IMAGE}" \ - "--" "/opt/airflow/scripts/in_container/run_flake8.sh" "${@}" + "${AIRFLOW_CI_IMAGE}" "/opt/airflow/scripts/in_container/run_flake8.sh" "${@}" fi } diff --git a/scripts/ci/pre_commit/pre_commit_lint_dockerfile.sh b/scripts/ci/pre_commit/pre_commit_lint_dockerfile.sh index 1db6b2f48c41..931421146bc8 100755 --- a/scripts/ci/pre_commit/pre_commit_lint_dockerfile.sh +++ b/scripts/ci/pre_commit/pre_commit_lint_dockerfile.sh @@ -52,4 +52,9 @@ function run_docker_lint() { fi } -run_docker_lint "$@" +if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + # See https://github.com/hadolint/hadolint/issues/411 + echo "Skip Hadolint check on ARM devices as they do not provide multiplatform images" +else + run_docker_lint "$@" +fi diff --git a/scripts/ci/pre_commit/pre_commit_migration_reference.sh b/scripts/ci/pre_commit/pre_commit_migration_reference.sh index 59a7989218aa..2d02fc3584a3 100755 --- a/scripts/ci/pre_commit/pre_commit_migration_reference.sh +++ b/scripts/ci/pre_commit/pre_commit_migration_reference.sh @@ -24,14 +24,12 @@ export PRINT_INFO_FROM_SCRIPTS="false" function migration_reference() { if [[ "${#@}" == "0" ]]; then docker_v run "${EXTRA_DOCKER_FLAGS[@]}" \ - --entrypoint "/usr/local/bin/dumb-init" \ "${AIRFLOW_CI_IMAGE}" \ - "--" "/opt/airflow/scripts/in_container/run_migration_reference.sh" + "/opt/airflow/scripts/in_container/run_migration_reference.sh" else docker_v run "${EXTRA_DOCKER_FLAGS[@]}" \ - --entrypoint "/usr/local/bin/dumb-init" \ "${AIRFLOW_CI_IMAGE}" \ - "--" "/opt/airflow/scripts/in_container/run_migration_reference.sh" "${@}" + "/opt/airflow/scripts/in_container/run_migration_reference.sh" "${@}" fi } diff --git a/scripts/ci/pre_commit/pre_commit_mypy.sh b/scripts/ci/pre_commit/pre_commit_mypy.sh index f49fc02a778d..60d1671fae03 100755 --- a/scripts/ci/pre_commit/pre_commit_mypy.sh +++ b/scripts/ci/pre_commit/pre_commit_mypy.sh @@ -31,10 +31,9 @@ function run_mypy() { fi docker_v run "${EXTRA_DOCKER_FLAGS[@]}" -t \ - --entrypoint "/usr/local/bin/dumb-init" \ "-v" "${AIRFLOW_SOURCES}/.mypy_cache:/opt/airflow/.mypy_cache" \ "${AIRFLOW_CI_IMAGE_WITH_TAG}" \ - "--" "/opt/airflow/scripts/in_container/run_mypy.sh" "${files[@]}" + "/opt/airflow/scripts/in_container/run_mypy.sh" "${files[@]}" } build_images::prepare_ci_build diff --git a/scripts/ci/provider_packages/ci_install_and_test_provider_packages.sh b/scripts/ci/provider_packages/ci_install_and_test_provider_packages.sh index b8228851f4dd..cf9c3832dd14 100755 --- a/scripts/ci/provider_packages/ci_install_and_test_provider_packages.sh +++ b/scripts/ci/provider_packages/ci_install_and_test_provider_packages.sh @@ -30,7 +30,6 @@ fi function run_test_package_import_all_classes() { # Groups are added internally docker_v run "${EXTRA_DOCKER_FLAGS[@]}" \ - --entrypoint "/usr/local/bin/dumb-init" \ -t \ -v "${AIRFLOW_SOURCES}/setup.py:/airflow_sources/setup.py:cached" \ -v "${AIRFLOW_SOURCES}/setup.cfg:/airflow_sources/setup.cfg:cached" \ @@ -39,7 +38,7 @@ function run_test_package_import_all_classes() { -v "${AIRFLOW_SOURCES}/scripts/in_container:/opt/airflow/scripts/in_container:cached" \ -v "${AIRFLOW_SOURCES}/dev/import_all_classes.py:/opt/airflow/dev/import_all_classes.py:cached" \ "${AIRFLOW_CI_IMAGE_WITH_TAG}" \ - "--" "/opt/airflow/scripts/in_container/run_install_and_test_provider_packages.sh" + "/opt/airflow/scripts/in_container/run_install_and_test_provider_packages.sh" } build_images::prepare_ci_build diff --git a/scripts/docker/install_mssql.sh b/scripts/docker/install_mssql.sh index 024481d63569..f6c01868edad 100644 --- a/scripts/docker/install_mssql.sh +++ b/scripts/docker/install_mssql.sh @@ -64,4 +64,9 @@ function install_mssql_client() { apt-get clean && rm -rf /var/lib/apt/lists/* } +if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + # disable MSSQL for ARM64 + INSTALL_MSSQL_CLIENT="false" +fi + install_mssql_client "${@}" diff --git a/scripts/docker/install_mysql.sh b/scripts/docker/install_mysql.sh index de2ecab0a05c..790a1daf32b4 100644 --- a/scripts/docker/install_mysql.sh +++ b/scripts/docker/install_mysql.sh @@ -67,6 +67,11 @@ install_mysql_client() { apt-get clean && rm -rf /var/lib/apt/lists/* } +if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + # disable MYSQL for ARM64 + INSTALL_MYSQL_CLIENT="false" +fi + # Install MySQL client from Oracle repositories (Debian installs mariadb) # But only if it is not disabled if [[ ${INSTALL_MYSQL_CLIENT:="true"} == "true" ]]; then diff --git a/scripts/docker/install_pipx_tools.sh b/scripts/docker/install_pipx_tools.sh index 20a002bb9e52..a40903611c32 100644 --- a/scripts/docker/install_pipx_tools.sh +++ b/scripts/docker/install_pipx_tools.sh @@ -24,13 +24,16 @@ function install_pipx_tools() { echo # Make sure PIPX is installed in latest version pip install --upgrade pipx - # Install all the tools we need available in command line but without impacting the current environment - pipx install mssql-cli + if [[ $(uname -m) != "aarch64" ]]; then + # Do not install mssql-cli for ARM + # Install all the tools we need available in command line but without impacting the current environment + pipx install mssql-cli - # Unfortunately mssql-cli installed by `pipx` does not work out of the box because it uses - # its own execution bash script which is not compliant with the auto-activation of - # pipx venvs - we need to manually patch Python executable in the script to fix it: ¯\_(ツ)_/¯ - sed "s/python /\/root\/\.local\/pipx\/venvs\/mssql-cli\/bin\/python /" -i /root/.local/bin/mssql-cli + # Unfortunately mssql-cli installed by `pipx` does not work out of the box because it uses + # its own execution bash script which is not compliant with the auto-activation of + # pipx venvs - we need to manually patch Python executable in the script to fix it: ¯\_(ツ)_/¯ + sed "s/python /\/root\/\.local\/pipx\/venvs\/mssql-cli\/bin\/python /" -i /root/.local/bin/mssql-cli + fi } common::get_colors diff --git a/scripts/in_container/bin/install_kubectl.sh b/scripts/in_container/bin/install_kubectl.sh index a74f572db52a..15087e79d96a 100755 --- a/scripts/in_container/bin/install_kubectl.sh +++ b/scripts/in_container/bin/install_kubectl.sh @@ -33,7 +33,8 @@ if command -v kubectl; then fi KUBECTL_VERSION="$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)" -DOWNLOAD_URL="https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl" +KUBECTL_PLATFORM=$([ "$(uname -m)" = "aarch64" ] && echo "arm64" || echo "amd64" ) +DOWNLOAD_URL="https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/${KUBECTL_PLATFORM}/kubectl" if [[ -e ${BIN_PATH} ]]; then echo "The binary file (${BIN_PATH}) already exists. This may mean kubectl is already installed." diff --git a/scripts/in_container/bin/install_terraform.sh b/scripts/in_container/bin/install_terraform.sh index b6358e472cf5..f96d4cef4972 100755 --- a/scripts/in_container/bin/install_terraform.sh +++ b/scripts/in_container/bin/install_terraform.sh @@ -34,7 +34,8 @@ fi TERRAFORM_VERSION="0.14.4" TERRAFORM_BASE_URL="https://releases.hashicorp.com/terraform" -TERRAFORM_ZIP="terraform_${TERRAFORM_VERSION}_$(uname | tr '[:upper:]' '[:lower:]')_amd64.zip" +TERRAFOR_PLATFORM=$([ "$(uname -m)" = "aarch64" ] && echo "arm64" || echo "amd64" ) +TERRAFORM_ZIP="terraform_${TERRAFORM_VERSION}_$(uname | tr '[:upper:]' '[:lower:]')${TERRAFOR_PLATFORM}.zip" DOWNLOAD_URL="${TERRAFORM_BASE_URL}/${TERRAFORM_VERSION}/${TERRAFORM_ZIP}" TMP_DIR="$(mktemp -d)" diff --git a/scripts/in_container/entrypoint_ci.sh b/scripts/in_container/entrypoint_ci.sh index 76f2405d0734..f4eba9a52f51 100755 --- a/scripts/in_container/entrypoint_ci.sh +++ b/scripts/in_container/entrypoint_ci.sh @@ -22,6 +22,23 @@ fi # shellcheck source=scripts/in_container/_in_container_script_init.sh . /opt/airflow/scripts/in_container/_in_container_script_init.sh +# This one is to workaround https://github.com/apache/airflow/issues/17546 +# issue with /usr/lib/-linux-gnu/libstdc++.so.6: cannot allocate memory in static TLS block +# We do not yet a more "correct" solution to the problem but in order to avoid raising new issues +# by users of the prod image, we implement the workaround now. +# The side effect of this is slightly (in the range of 100s of milliseconds) slower load for any +# binary started and a little memory used for Heap allocated by initialization of libstdc++ +# This overhead is not happening for binaries that already link dynamically libstdc++ +LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6" +export LD_PRELOAD + +if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + if [[ ${BACKEND} == "mysql" || ${BACKEND} == "mssql" ]]; then + echo "${COLOR_RED}ARM platform is not supported for ${BACKEND} backend. Exiting.${COLOR_RESET}" + exit 1 + fi +fi + # Add "other" and "group" write permission to the tmp folder # Note that it will also change permissions in the /tmp folder on the host # but this is necessary to enable some of our CLI tools to work without errors diff --git a/scripts/in_container/prod/entrypoint_prod.sh b/scripts/in_container/prod/entrypoint_prod.sh index 6699f33808ce..4ed09d5fb3ee 100755 --- a/scripts/in_container/prod/entrypoint_prod.sh +++ b/scripts/in_container/prod/entrypoint_prod.sh @@ -20,6 +20,16 @@ AIRFLOW_COMMAND="${1:-}" set -euo pipefail +# This one is to workaround https://github.com/apache/airflow/issues/17546 +# issue with /usr/lib/-linux-gnu/libstdc++.so.6: cannot allocate memory in static TLS block +# We do not yet a more "correct" solution to the problem but in order to avoid raising new issues +# by users of the prod image, we implement the workaround now. +# The side effect of this is slightly (in the range of 100s of milliseconds) slower load for any +# binary started and a little memory used for Heap allocated by initialization of libstdc++ +# This overhead is not happening for binaries that already link dynamically libstdc++ +LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6" +export LD_PRELOAD + function run_check_with_retries { local cmd cmd="${1}" diff --git a/scripts/in_container/run_prepare_provider_documentation.sh b/scripts/in_container/run_prepare_provider_documentation.sh index fb2b61e9b3a6..68e06dfe9624 100755 --- a/scripts/in_container/run_prepare_provider_documentation.sh +++ b/scripts/in_container/run_prepare_provider_documentation.sh @@ -40,7 +40,7 @@ function run_prepare_documentation() { # There is a separate group created in logs for each provider package python3 "${PROVIDER_PACKAGES_DIR}/prepare_provider_packages.py" \ update-package-documentation \ - --version-suffix "${VERSION_SUFFIX_FOR_PYPI}" \ + --version-suffix "${VERSION_SUFFIX_FOR_PYPI=}" \ --no-git-update \ "${OPTIONAL_VERBOSE_FLAG[@]}" \ "${OPTIONAL_RELEASE_VERSION_ARGUMENT[@]}" \ diff --git a/setup.py b/setup.py index 3ce9ae73114e..ab4b5018ea24 100644 --- a/setup.py +++ b/setup.py @@ -420,7 +420,7 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version 'ldap3>=2.5.1', 'python-ldap', ] -leveldb = ['plyvel'] +leveldb = ['plyvel; platform_machine != "aarch64"'] mongo = [ 'dnspython>=1.13.0', # pymongo 4.0.0 removes connection option `ssl_cert_reqs` which is used in providers-mongo/2.2.0 @@ -428,11 +428,11 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version 'pymongo>=3.6.0,<4.0.0', ] mssql = [ - 'pymssql>=2.1.5', + 'pymssql>=2.1.5; platform_machine != "aarch64"', ] mysql = [ - 'mysql-connector-python>=8.0.11', - 'mysqlclient>=1.3.6', + 'mysql-connector-python>=8.0.11; platform_machine != "aarch64"', + 'mysqlclient>=1.3.6; platform_machine != "aarch64"', ] neo4j = ['neo4j>=4.2.1'] odbc = [