From 8ec8a75705a6da53787752d679c209d1ada94c0a Mon Sep 17 00:00:00 2001 From: "Damon P. Cortesi" Date: Mon, 17 Oct 2022 14:00:10 -0700 Subject: [PATCH] Fixes #4 - Install gcc/python3-devel for non amd64 arch - Split out pip install so later pip installs can benefit --- templates/pyspark.dockerfile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/templates/pyspark.dockerfile b/templates/pyspark.dockerfile index 66971d8..8de5d73 100644 --- a/templates/pyspark.dockerfile +++ b/templates/pyspark.dockerfile @@ -4,6 +4,7 @@ ARG RELEASE="emr-6.6.0" ARG RELEASE_TAG="latest" ARG REGION="us-west-2" ARG EMR_ACCOUNT_ID="895885662937" +ARG TARGETARCH FROM ${EMR_ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/spark/${RELEASE}:${RELEASE_TAG} @@ -33,8 +34,14 @@ RUN yum install -y git unzip && \ ./aws/install && \ rm -rf aws awscliv2.zip +# ipykernel depends on pusutil, which does not publish wheels for aarch64 +RUN if [ "$TARGETARCH" != "amd64" ]; then yum install -y gcc python3-devel; fi + +# Upgrade pip first +RUN python3 -m pip install -U pip + # Enable Jupyter notebooks -RUN python3 -m pip install -U pip ipykernel +RUN python3 -m pip install ipykernel # Switch back to the default user USER hadoop:hadoop