Skip to content

Commit

Permalink
[aarch64] Add CUDA 12.4 build script for ARM wheel (#1775)
Browse files Browse the repository at this point in the history
Add cuda_aarch64 ARM wheel build script with CUDA 12.4.
Reference #1302.

---------

Co-authored-by: Nikita Shulga <[email protected]>
  • Loading branch information
tinglvv and malfet authored Apr 19, 2024
1 parent 4e10974 commit a79e1ce
Show file tree
Hide file tree
Showing 6 changed files with 348 additions and 42 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/build-manywheel-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ on:
- .github/workflows/build-manywheel-images.yml
- manywheel/Dockerfile
- manywheel/Dockerfile_aarch64
- manywheel/Dockerfile_cuda_aarch64
- manywheel/Dockerfile_cxx11-abi
- manywheel/build_docker.sh
- 'common/*'
Expand All @@ -21,6 +22,7 @@ on:
- .github/workflows/build-manywheel-images.yml
- manywheel/Dockerfile
- manywheel/Dockerfile_aarch64
- manywheel/Dockerfile_cuda_aarch64
- manywheel/Dockerfile_cxx11-abi
- 'common/*'
- manywheel/build_docker.sh
Expand Down Expand Up @@ -54,6 +56,25 @@ jobs:
- name: Build Docker Image
run: |
manywheel/build_docker.sh
build-docker-cuda-aarch64:
runs-on: linux.arm64.2xlarge
strategy:
matrix:
cuda_version: ["12.4"]
env:
GPU_ARCH_TYPE: cuda-aarch64
GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
steps:
- name: Checkout PyTorch
uses: actions/checkout@v3
- name: Authenticate if WITH_PUSH
run: |
if [[ "${WITH_PUSH}" == true ]]; then
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
fi
- name: Build Docker Image
run: |
manywheel/build_docker.sh
build-docker-rocm:
runs-on: linux.12xlarge
strategy:
Expand Down
8 changes: 7 additions & 1 deletion aarch64_linux/aarch64_ci_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,10 @@ cd /
git config --global --add safe.directory /pytorch
pip install -r /pytorch/requirements.txt
pip install auditwheel
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
if [ -n "$GPU_ARCH_VERSION" ]; then
echo "BASE_CUDA_VERSION is set to: $GPU_ARCH_VERSION"
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
else
echo "BASE_CUDA_VERSION is not set."
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
fi
180 changes: 139 additions & 41 deletions aarch64_linux/aarch64_wheel_ci_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,103 +9,201 @@


def list_dir(path: str) -> List[str]:
''''
"""'
Helper for getting paths for Python
'''
"""
return check_output(["ls", "-1", path]).decode().split("\n")


def build_ArmComputeLibrary() -> None:
'''
"""
Using ArmComputeLibrary for aarch64 PyTorch
'''
print('Building Arm Compute Library')
acl_build_flags=["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0",
"arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"]
acl_install_dir="/acl"
acl_checkout_dir="ComputeLibrary"
"""
print("Building Arm Compute Library")
acl_build_flags = [
"debug=0",
"neon=1",
"opencl=0",
"os=linux",
"openmp=1",
"cppthreads=0",
"arch=armv8a",
"multi_isa=1",
"fixed_format_kernels=1",
"build=native",
]
acl_install_dir = "/acl"
acl_checkout_dir = "ComputeLibrary"
os.makedirs(acl_install_dir)
check_call(["git", "clone", "https://github.com/ARM-software/ComputeLibrary.git", "-b", "v23.08",
"--depth", "1", "--shallow-submodules"])
check_call(["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"] + acl_build_flags,
cwd=acl_checkout_dir)
check_call(
[
"git",
"clone",
"https://github.com/ARM-software/ComputeLibrary.git",
"-b",
"v23.08",
"--depth",
"1",
"--shallow-submodules",
]
)
check_call(
["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"]
+ acl_build_flags,
cwd=acl_checkout_dir,
)
for d in ["arm_compute", "include", "utils", "support", "src"]:
shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}")


def update_wheel(wheel_path) -> None:
"""
Update the cuda wheel libraries
"""
folder = os.path.dirname(wheel_path)
wheelname = os.path.basename(wheel_path)
os.mkdir(f"{folder}/tmp")
os.system(f"unzip {wheel_path} -d {folder}/tmp")
libs_to_copy = [
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
"/usr/local/cuda/lib64/libcudnn.so.8",
"/usr/local/cuda/lib64/libcublas.so.12",
"/usr/local/cuda/lib64/libcublasLt.so.12",
"/usr/local/cuda/lib64/libcudart.so.12",
"/usr/local/cuda/lib64/libcufft.so.11",
"/usr/local/cuda/lib64/libcusparse.so.12",
"/usr/local/cuda/lib64/libcusparseLt.so.0",
"/usr/local/cuda/lib64/libcusolver.so.11",
"/usr/local/cuda/lib64/libcurand.so.10",
"/usr/local/cuda/lib64/libnvToolsExt.so.1",
"/usr/local/cuda/lib64/libnvJitLink.so.12",
"/usr/local/cuda/lib64/libnvrtc.so.12",
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.4",
"/usr/local/cuda/lib64/libcudnn_adv_infer.so.8",
"/usr/local/cuda/lib64/libcudnn_adv_train.so.8",
"/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8",
"/usr/local/cuda/lib64/libcudnn_cnn_train.so.8",
"/usr/local/cuda/lib64/libcudnn_ops_infer.so.8",
"/usr/local/cuda/lib64/libcudnn_ops_train.so.8",
"/opt/conda/envs/aarch64_env/lib/libopenblas.so.0",
"/opt/conda/envs/aarch64_env/lib/libgfortran.so.5",
"/opt/conda/envs/aarch64_env/lib/libgomp.so.1",
"/acl/build/libarm_compute.so",
"/acl/build/libarm_compute_graph.so",
"/acl/build/libarm_compute_core.so",
]
# Copy libraries to unzipped_folder/a/lib
for lib_path in libs_to_copy:
lib_name = os.path.basename(lib_path)
shutil.copy2(lib_path, f"{folder}/tmp/torch/lib/{lib_name}")
os.system(
f"cd {folder}/tmp/torch/lib/; patchelf --set-rpath '$ORIGIN' {folder}/tmp/torch/lib/libtorch_cuda.so"
)
os.mkdir(f"{folder}/cuda_wheel")
os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *")
shutil.move(
f"{folder}/cuda_wheel/{wheelname}",
f"/dist/{wheelname}",
copy_function=shutil.copy2,
)
os.system(f"rm -rf {folder}/tmp {folder}/dist/cuda_wheel/")


def complete_wheel(folder: str) -> str:
'''
"""
Complete wheel build and put in artifact location
'''
"""
wheel_name = list_dir(f"/{folder}/dist")[0]

if "pytorch" in folder:
if "pytorch" in folder and not enable_cuda:
print("Repairing Wheel with AuditWheel")
check_call(["auditwheel","repair", f"dist/{wheel_name}"], cwd=folder)
check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder)
repaired_wheel_name = list_dir(f"/{folder}/wheelhouse")[0]

print(f"Moving {repaired_wheel_name} wheel to /{folder}/dist")
os.rename(f"/{folder}/wheelhouse/{repaired_wheel_name}", f"/{folder}/dist/{repaired_wheel_name}")
os.rename(
f"/{folder}/wheelhouse/{repaired_wheel_name}",
f"/{folder}/dist/{repaired_wheel_name}",
)
else:
repaired_wheel_name = wheel_name

print(f"Copying {repaired_wheel_name} to artfacts")
shutil.copy2(f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}")
print(f"Copying {repaired_wheel_name} to artifacts")
shutil.copy2(
f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}"
)

return repaired_wheel_name


def parse_arguments():
'''
"""
Parse inline arguments
'''
"""
from argparse import ArgumentParser

parser = ArgumentParser("AARCH64 wheels python CD")
parser.add_argument("--debug", action="store_true")
parser.add_argument("--build-only", action="store_true")
parser.add_argument("--test-only", type=str)
parser.add_argument("--enable-mkldnn", action="store_true")
parser.add_argument("--enable-cuda", action="store_true")
return parser.parse_args()


if __name__ == '__main__':
'''
if __name__ == "__main__":
"""
Entry Point
'''
"""
args = parse_arguments()
enable_mkldnn = args.enable_mkldnn
repo = Repository('/pytorch')
enable_cuda = args.enable_cuda
repo = Repository("/pytorch")
branch = repo.head.name
if branch == 'HEAD':
branch = 'master'

if branch == "HEAD":
branch = "master"

print('Building PyTorch wheel')
print("Building PyTorch wheel")
build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
os.system("python setup.py clean")

override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
if override_package_version is not None:
version = override_package_version
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
elif branch in ['nightly', 'master']:
build_date = check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
version = check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
build_vars += (
f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
)
elif branch in ["nightly", "master"]:
build_date = (
check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch")
.decode()
.replace("-", "")
)
version = (
check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2]
)
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
elif branch.startswith(("v1.", "v2.")):
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "

if enable_mkldnn:
build_ArmComputeLibrary()
print("build pytorch with mkldnn+acl backend")
build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " \
"ACL_ROOT_DIR=/acl " \
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " \
"ACL_INCLUDE_DIR=/acl/build " \
"ACL_LIBRARY=/acl/build "
build_vars += (
"USE_MKLDNN=ON USE_MKLDNN_ACL=ON "
"ACL_ROOT_DIR=/acl "
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH "
"ACL_INCLUDE_DIR=/acl/build "
"ACL_LIBRARY=/acl/build "
)
else:
print("build pytorch without mkldnn backend")

os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
pytorch_wheel_name = complete_wheel("pytorch")
print(f"Build Compelete. Created {pytorch_wheel_name}..")
if enable_cuda:
print("Updating Cuda Dependency")
filename = os.listdir("/pytorch/dist/")
wheel_path = f"/pytorch/dist/{filename[0]}"
update_wheel(wheel_path)
pytorch_wheel_name = complete_wheel("/pytorch/")
print(f"Build Complete. Created {pytorch_wheel_name}..")
90 changes: 90 additions & 0 deletions common/install_cuda_aarch64.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/bin/bash

set -ex

function install_cusparselt_052 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
tar xf libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}

function install_124 {
echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
# install CUDA 12.4.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux_sbsa.run
chmod +x cuda_12.4.0_550.54.14_linux_sbsa.run
./cuda_12.4.0_550.54.14_linux_sbsa.run --toolkit --silent
rm -f cuda_12.4.0_550.54.14_linux_sbsa.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz
tar xf cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz
cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl

install_cusparselt_052

ldconfig
}

function prune_124 {
echo "Pruning CUDA 12.4"
#####################################################################################
# CUDA 12.4 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"

export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"

if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi

# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"

# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a

#####################################################################################
# CUDA 12.1 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.4/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
}

# idiomatic parameter and option handling in sh
while test $# -gt 0
do
case "$1" in
12.4) install_124; prune_124
;;
*) echo "bad argument $1"; exit 1
;;
esac
shift
done
Loading

0 comments on commit a79e1ce

Please sign in to comment.