kubeflow · google-oss-prow · Jan 17, 2023 · Jan 11, 2023 · Jan 16, 2023 · tenzen-y
diff --git a/examples/v1beta1/trial-images/darts-cnn-cifar10/Dockerfile.gpu b/examples/v1beta1/trial-images/darts-cnn-cifar10/Dockerfile.gpu
@@ -1,6 +1,7 @@
-# Pytorch=1.11.0, cuda=11.6.0
-# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_22-08.html#rel_22-08
-FROM nvcr.io/nvidia/pytorch:22.02-py3
+# We need to use the nvcr.io/nvidia/pytorch image as a base image to support both linux/amd64 and linux_arm64 platforms.
+# PyTorch=1.13.0, cuda=11.8.0
+# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-22-11.html#rel-22-11
+FROM nvcr.io/nvidia/pytorch:22.11-py3
 
 ENV TARGET_DIR /opt/darts-cnn-cifar10
 

diff --git a/examples/v1beta1/trial-images/darts-cnn-cifar10/requirements.txt b/examples/v1beta1/trial-images/darts-cnn-cifar10/requirements.txt
@@ -1,3 +1,3 @@
-torch==1.11.0
-torchvision==0.12.0
+torch==1.13.1
+torchvision==0.14.1
 Pillow>=9.1.1
diff --git a/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.gpu b/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.gpu
@@ -1,6 +1,7 @@
-# Pytorch=1.11.0, cuda=11.6.0
-# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_22-08.html#rel_22-08
-FROM nvcr.io/nvidia/pytorch:22.02-py3
+# We need to use the nvcr.io/nvidia/pytorch image as a base image to support both linux/amd64 and linux_arm64 platforms.
+# PyTorch=1.13.0, cuda=11.8.0
+# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-22-11.html#rel-22-11
+FROM nvcr.io/nvidia/pytorch:22.11-py3
 
 ADD examples/v1beta1/trial-images/pytorch-mnist /opt/pytorch-mnist
 

diff --git a/examples/v1beta1/trial-images/pytorch-mnist/requirements.txt b/examples/v1beta1/trial-images/pytorch-mnist/requirements.txt
@@ -1,4 +1,4 @@
 cloudml-hypertune==0.1.0.dev6
-torch==1.11.0
-torchvision==0.12.0
+torch==1.13.1
+torchvision==0.14.1
 Pillow>=9.1.1
diff --git a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh
@@ -20,7 +20,10 @@
 set -o errexit
 set -o pipefail
 set -o nounset
-cd "$(dirname "$0")"
+
+pushd .
+cd "$(dirname "$0")/../../../../.."
+trap popd EXIT
 
 TRIAL_IMAGES=${1:-""}
 EXPERIMENTS=${2:-""}
@@ -48,14 +51,7 @@ _build_containers() {
   done
 
   echo -e "\nBuilding $CONTAINER_NAME image with $DOCKERFILE...\n"
-  docker buildx build --platform "$(uname -m)" --load -t "$REGISTRY/$CONTAINER_NAME:$TAG" -f "../../../../../$DOCKERFILE" ../../../../../
-}
-
-_load_minikube_cluster() {
-  CONTAINER_NAME=${1:-"katib-controller"}
-
-  echo -e "\n\nLoading $CONTAINER_NAME image...\n\n"
-  minikube image load "$REGISTRY/$CONTAINER_NAME:$TAG"
+  DOCKER_BUILDKIT=1 minikube image build --build-opt platform=linux/amd64 --all -t "$REGISTRY/$CONTAINER_NAME:$TAG" -f "$DOCKERFILE" .
 }
 
 _install_tools() {
@@ -66,11 +62,6 @@ _install_tools() {
   fi
 }
 
-cleanup_build_cache() {
-  echo -e "\nCleanup Build Cache...\n"
-  docker builder prune
-}
-
 run() {
   CONTAINER_NAME=${1:-"katib-controller"}
   DOCKERFILE=${2:-"$CMD_PREFIX/katib-controller/$VERSION/Dockerfile"}
@@ -85,10 +76,10 @@ run() {
     # Search for Suggestion Images required for Trial.
     for exp_name in "${EXPERIMENT_ARRAY[@]}"; do
 
-      exp_path=$(find ../../../../../examples/v1beta1 -name "${exp_name}.yaml")
+      exp_path=$(find examples/v1beta1 -name "${exp_name}.yaml")
       algorithm_name="$(yq eval '.spec.algorithm.algorithmName' "$exp_path")"
 
-      suggestion_image_name="$(yq eval '.data.suggestion' ../../../../../manifests/v1beta1/components/controller/katib-config.yaml |
+      suggestion_image_name="$(yq eval '.data.suggestion' manifests/v1beta1/components/controller/katib-config.yaml |
         algorithm_name=$algorithm_name yq eval '.[env(algorithm_name)].image' | cut -d: -f1)"
       suggestion_name="$(basename "$suggestion_image_name")"
 
@@ -99,7 +90,6 @@ run() {
     for s in "${suggestions[@]}"; do
       if [ "$s" == "$CONTAINER_NAME" ]; then
         _build_containers "$CONTAINER_NAME" "$DOCKERFILE"
-        _load_minikube_cluster "$CONTAINER_NAME"
         break
       fi
     done
@@ -112,10 +102,10 @@ run() {
     # Search for EarlyStopping Images required for Trial.
     for exp_name in "${EXPERIMENT_ARRAY[@]}"; do
 
-      exp_path=$(find ../../../../../examples/v1beta1 -name "${exp_name}.yaml")
+      exp_path=$(find examples/v1beta1 -name "${exp_name}.yaml")
       algorithm_name="$(yq eval '.spec.earlyStopping.algorithmName' "$exp_path")"
 
-      earlystopping_image_name="$(yq eval '.data.early-stopping' ../../../../../manifests/v1beta1/components/controller/katib-config.yaml |
+      earlystopping_image_name="$(yq eval '.data.early-stopping' manifests/v1beta1/components/controller/katib-config.yaml |
         algorithm_name=$algorithm_name yq eval '.[env(algorithm_name)].image' | cut -d: -f1)"
       earlystopping_name="$(basename "$earlystopping_image_name")"
 
@@ -126,15 +116,13 @@ run() {
     for e in "${earlystoppings[@]}"; do
       if [ "$e" == "$CONTAINER_NAME" ]; then
         _build_containers "$CONTAINER_NAME" "$DOCKERFILE"
-        _load_minikube_cluster "$CONTAINER_NAME"
         break
       fi
     done
 
   # Others
   else
     _build_containers "$CONTAINER_NAME" "$DOCKERFILE"
-    _load_minikube_cluster "$CONTAINER_NAME"
   fi
 }
 
@@ -153,7 +141,6 @@ fi
 run "cert-generator" "$CMD_PREFIX/cert-generator/$VERSION/Dockerfile"
 run "file-metrics-collector" "$CMD_PREFIX/metricscollector/$VERSION/file-metricscollector/Dockerfile"
 run "tfevent-metrics-collector" "$CMD_PREFIX/metricscollector/$VERSION/tfevent-metricscollector/Dockerfile"
-cleanup_build_cache
 
 # Suggestion images
 echo -e "\nBuilding suggestion images..."
@@ -165,18 +152,18 @@ run "suggestion-optuna" "$CMD_PREFIX/suggestion/optuna/$VERSION/Dockerfile"
 run "suggestion-pbt" "$CMD_PREFIX/suggestion/pbt/$VERSION/Dockerfile"
 run "suggestion-enas" "$CMD_PREFIX/suggestion/nas/enas/$VERSION/Dockerfile"
 run "suggestion-darts" "$CMD_PREFIX/suggestion/nas/darts/$VERSION/Dockerfile"
-cleanup_build_cache
 
 # Early stopping images
 echo -e "\nBuilding early stopping images...\n"
 run "earlystopping-medianstop" "$CMD_PREFIX/earlystopping/medianstop/$VERSION/Dockerfile"
-cleanup_build_cache
 
 # Training container images
 echo -e "\nBuilding training container images..."
 for name in "${TRIAL_IMAGE_ARRAY[@]}"; do
   run "$name" "examples/$VERSION/trial-images/$name/Dockerfile"
 done
-cleanup_build_cache
+
+echo -e "\nCleanup Build Cache...\n"
+docker buildx prune -f
 
 echo -e "\nAll Katib images with ${TAG} tag have been built successfully!\n"