Skip to content

Commit

Permalink
Upgrade nightly wheels to rocm5.5 (#1407)
Browse files Browse the repository at this point in the history
* Add MIOpen db files to wheel

* Update magma commits for various branches to include header path updates

* Add ROCm5.5 support with Navi31-tuned MIOpen branch

* Upgrade nightly wheels to rocm5.5

* Update build_docker.sh for gfx1100

* Update build_docker.sh for gfx1100

---------

Co-authored-by: Jithun Nair <[email protected]>
Co-authored-by: Jithun Nair <[email protected]>
  • Loading branch information
3 people authored May 23, 2023
1 parent e795fee commit 3237101
Show file tree
Hide file tree
Showing 10 changed files with 25 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-libtorch-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
runs-on: ubuntu-22.04
strategy:
matrix:
rocm_version: ["5.3", "5.4.2"]
rocm_version: ["5.4.2", "5.5"]
env:
GPU_ARCH_TYPE: rocm
GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-manywheel-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
runs-on: ubuntu-22.04
strategy:
matrix:
rocm_version: ["5.3", "5.4.2"]
rocm_version: ["5.4.2", "5.5"]
env:
GPU_ARCH_TYPE: rocm
GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
Expand Down
4 changes: 3 additions & 1 deletion common/install_miopen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ MIOPEN_CMAKE_COMMON_FLAGS="
-DMIOPEN_BUILD_DRIVER=OFF
"
# Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
if [[ $ROCM_INT -ge 50500 ]] && [[ $ROCM_INT -lt 50600 ]]; then
MIOPEN_BRANCH="release/rocm-rel-5.5-gfx11"
elif [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
MIOPEN_BRANCH="release/rocm-rel-5.4-staging"
elif [[ $ROCM_INT -ge 50300 ]] && [[ $ROCM_INT -lt 50400 ]]; then
Expand Down
7 changes: 5 additions & 2 deletions common/install_rocm_magma.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@ MKLROOT=${MKLROOT:-/opt/intel}
# "install" hipMAGMA into /opt/rocm/magma by copying after build
git clone https://bitbucket.org/icl/magma.git
pushd magma
# fix for magma_queue memory leak issue
git checkout c62d700d880c7283b33fb1d615d62fc9c7f7ca21
if [[ $PYTORCH_BRANCH == "release/1.10.1" ]]; then
git checkout magma_ctrl_launch_bounds
else
git checkout 28592a7170e4b3707ed92644bf4a689ed600c27f
fi
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
# TODO (1)
Expand Down
8 changes: 4 additions & 4 deletions libtorch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,13 @@ RUN apt-get update -y && \
apt-get install python -y && \
apt-get clean

FROM rocm as rocm5.3
RUN ROCM_VERSION=5.3 bash ./install_rocm.sh && rm install_rocm.sh
FROM rocm as rocm5.4.2
RUN ROCM_VERSION=5.4.2 bash ./install_rocm.sh && rm install_rocm.sh
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh

FROM rocm as rocm5.4.2
RUN ROCM_VERSION=5.4.2 bash ./install_rocm.sh && rm install_rocm.sh
FROM rocm as rocm5.5
RUN ROCM_VERSION=5.5 bash ./install_rocm.sh && rm install_rocm.sh
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh

Expand Down
2 changes: 1 addition & 1 deletion libtorch/build_all_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ for cuda_version in 12.1 11.8 11.7; do
GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/libtorch/build_docker.sh"
done

for rocm_version in 5.3 5.4.2; do
for rocm_version in 5.4.2 5.5; do
GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/libtorch/build_docker.sh"
done
5 changes: 1 addition & 4 deletions libtorch/build_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,14 @@ case ${GPU_ARCH_TYPE} in
BASE_TARGET=rocm${GPU_ARCH_VERSION}
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-magma
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908"
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
else
echo "ERROR: rocm regex failed"
exit 1
fi
if [[ $ROCM_VERSION_INT -ge 40300 ]]; then
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx90a;gfx1030"
fi
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
;;
*)
Expand Down
2 changes: 1 addition & 1 deletion manywheel/build_all_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ for cuda_version in 11.8 11.7; do
MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh"
done

for rocm_version in 5.3 5.4.2; do
for rocm_version in 5.4.2 5.5; do
GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh"
MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh"
done
5 changes: 1 addition & 4 deletions manywheel/build_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,14 @@ case ${GPU_ARCH_TYPE} in
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-rocm:${GPU_ARCH_VERSION}
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-magma-miopen-staging
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908"
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
else
echo "ERROR: rocm regex failed"
exit 1
fi
if [[ $ROCM_VERSION_INT -ge 40300 ]]; then
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx90a;gfx1030"
fi
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9"
;;
*)
Expand Down
7 changes: 7 additions & 0 deletions manywheel/build_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@ ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH)
OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx)
ROCBLAS_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES)

# MIOpen library files
MIOPEN_SHARE_SRC=$ROCM_HOME/share/miopen/db
MIOPEN_SHARE_DST=share/miopen/db
MIOPEN_SHARE_FILES=($(ls $MIOPEN_SHARE_SRC | grep -E $ARCH))

# ROCm library files
ROCM_SO_PATHS=()
for lib in "${ROCM_SO_FILES[@]}"
Expand Down Expand Up @@ -174,11 +179,13 @@ DEPS_SONAME=(

DEPS_AUX_SRCLIST=(
"${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_SRC/}"
"${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_SRC/}"
"/opt/amdgpu/share/libdrm/amdgpu.ids"
)

DEPS_AUX_DSTLIST=(
"${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_DST/}"
"${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_DST/}"
"share/libdrm/amdgpu.ids"
)

Expand Down

0 comments on commit 3237101

Please sign in to comment.