Skip to content

Commit

Permalink
RHOAIENG-10783: fix(rocm): de-vendor the bundled rocm libraries from …
Browse files Browse the repository at this point in the history
…pytorch (#652)

* RHOAIENG-9853: fix(rocm): de-vendor the bundled rocm libraries from pytorch

Use the script from instructlab to remove the duplicate copy of rocm libs from the image.
This will make the image significantly smaller.
Script lives at https://github.com/tiran/instructlab-containers/blob/main/containers/rocm/de-vendor-torch.sh

* add a selftest
  • Loading branch information
jiridanek authored Aug 6, 2024
1 parent b5322d8 commit ec45de1
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 0 deletions.
5 changes: 5 additions & 0 deletions jupyter/rocm/pytorch/ubi9-python-3.9/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@ LABEL name="odh-notebook-jupyter-rocm-pytorch-ubi9-python-3.9" \

# Install Python packages and Jupyterlab extensions from Pipfile.lock
COPY Pipfile.lock ./
# Copy utility script
COPY de-vendor-torch.sh ./

RUN echo "Installing softwares and packages" && micropipenv install && rm -f ./Pipfile.lock && \
# De-vendor the ROCm libs that are embedded in Pytorch \
./de-vendor-torch.sh && \
rm ./de-vendor-torch.sh && \
# Replace Notebook's launcher, "(ipykernel)" with Python's version 3.x.y
sed -i -e "s/Python.*/$(python --version | cut -d '.' -f-2)\",/" /opt/app-root/share/jupyter/kernels/python3/kernel.json && \
# Disable announcement plugin of jupyterlab
Expand Down
41 changes: 41 additions & 0 deletions jupyter/rocm/pytorch/ubi9-python-3.9/de-vendor-torch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/sh
set -ex
# Replace PyTorch's vendored shared libraries with system libraries
# The script assumes that PyTorch is built with the same ROCm ABI as the
# system installation of ROCm.

# Source: https://github.com/tiran/instructlab-containers/blob/main/containers/rocm/de-vendor-torch.sh

PYTHON=python3.9
ROCMLIB=/opt/rocm/lib
TORCHLIB=/opt/app-root/lib/${PYTHON}/site-packages/torch/lib

ln -sf /usr/lib64/libdrm.so.2 ${TORCHLIB}/libdrm.so
ln -sf /usr/lib64/libdrm_amdgpu.so.1 ${TORCHLIB}/libdrm_amdgpu.so

ln -sf ${ROCMLIB}/libamd_comgr.so.2 ${TORCHLIB}/libamd_comgr.so
ln -sf ${ROCMLIB}/libamdhip64.so.6 ${TORCHLIB}/libamdhip64.so
ln -sf ${ROCMLIB}/libhipblaslt.so.0 ${TORCHLIB}/libhipblaslt.so
ln -sf ${ROCMLIB}/libhipblas.so.2 ${TORCHLIB}/libhipblas.so
ln -sf ${ROCMLIB}/libhipfft.so.0 ${TORCHLIB}/libhipfft.so
ln -sf ${ROCMLIB}/libhiprand.so.1 ${TORCHLIB}/libhiprand.so
ln -sf ${ROCMLIB}/libhiprtc.so.6 ${TORCHLIB}/libhiprtc.so
ln -sf ${ROCMLIB}/libhipsolver.so.0 ${TORCHLIB}/libhipsolver.so
ln -sf ${ROCMLIB}/libhipsparse.so.1 ${TORCHLIB}/libhipsparse.so
ln -sf ${ROCMLIB}/libhsa-runtime64.so.1 ${TORCHLIB}/libhsa-runtime64.so
ln -sf ${ROCMLIB}/libMIOpen.so.1 ${TORCHLIB}/libMIOpen.so
ln -sf ${ROCMLIB}/librccl.so.1 ${TORCHLIB}/librccl.so
ln -sf ${ROCMLIB}/librocblas.so.4 ${TORCHLIB}/librocblas.so
ln -sf ${ROCMLIB}/librocfft.so.0 ${TORCHLIB}/librocfft.so
ln -sf ${ROCMLIB}/librocm_smi64.so.6 ${TORCHLIB}/librocm_smi64.so
ln -sf ${ROCMLIB}/librocrand.so.1 ${TORCHLIB}/librocrand.so
ln -sf ${ROCMLIB}/librocsolver.so.0 ${TORCHLIB}/librocsolver.so
ln -sf ${ROCMLIB}/librocsparse.so.1 ${TORCHLIB}/librocsparse.so
ln -sf ${ROCMLIB}/libroctracer64.so.4 ${TORCHLIB}/libroctracer64.so
ln -sf ${ROCMLIB}/libroctx64.so.4 ${TORCHLIB}/libroctx64.so

rm -rf ${TORCHLIB}/rocblas
ln -sf ${ROCMLIB}/rocblas ${TORCHLIB}/rocblas

rm -rf ${TORCHLIB}/hipblaslt
ln -sf ${ROCMLIB}/hipblaslt ${TORCHLIB}/hipblaslt
5 changes: 5 additions & 0 deletions runtimes/rocm-pytorch/ubi9-python-3.9/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,15 @@ WORKDIR /opt/app-root/bin
COPY Pipfile.lock ./
# Copy Elyra dependencies for air-gapped enviroment
COPY utils ./utils/
# Copy utility script
COPY de-vendor-torch.sh ./

RUN echo "Installing softwares and packages" && \
micropipenv install && \
rm -f ./Pipfile.lock && \
# De-vendor the ROCm libs that are embedded in Pytorch \
./de-vendor-torch.sh && \
rm ./de-vendor-torch.sh && \
# Fix permissions to support pip in Openshift environments \
chmod -R g+w /opt/app-root/lib/python3.9/site-packages && \
fix-permissions /opt/app-root -P
Expand Down
41 changes: 41 additions & 0 deletions runtimes/rocm-pytorch/ubi9-python-3.9/de-vendor-torch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/sh
set -ex
# Replace PyTorch's vendored shared libraries with system libraries
# The script assumes that PyTorch is built with the same ROCm ABI as the
# system installation of ROCm.

# Source: https://github.com/tiran/instructlab-containers/blob/main/containers/rocm/de-vendor-torch.sh

PYTHON=python3.9
ROCMLIB=/opt/rocm/lib
TORCHLIB=/opt/app-root/lib/${PYTHON}/site-packages/torch/lib

ln -sf /usr/lib64/libdrm.so.2 ${TORCHLIB}/libdrm.so
ln -sf /usr/lib64/libdrm_amdgpu.so.1 ${TORCHLIB}/libdrm_amdgpu.so

ln -sf ${ROCMLIB}/libamd_comgr.so.2 ${TORCHLIB}/libamd_comgr.so
ln -sf ${ROCMLIB}/libamdhip64.so.6 ${TORCHLIB}/libamdhip64.so
ln -sf ${ROCMLIB}/libhipblaslt.so.0 ${TORCHLIB}/libhipblaslt.so
ln -sf ${ROCMLIB}/libhipblas.so.2 ${TORCHLIB}/libhipblas.so
ln -sf ${ROCMLIB}/libhipfft.so.0 ${TORCHLIB}/libhipfft.so
ln -sf ${ROCMLIB}/libhiprand.so.1 ${TORCHLIB}/libhiprand.so
ln -sf ${ROCMLIB}/libhiprtc.so.6 ${TORCHLIB}/libhiprtc.so
ln -sf ${ROCMLIB}/libhipsolver.so.0 ${TORCHLIB}/libhipsolver.so
ln -sf ${ROCMLIB}/libhipsparse.so.1 ${TORCHLIB}/libhipsparse.so
ln -sf ${ROCMLIB}/libhsa-runtime64.so.1 ${TORCHLIB}/libhsa-runtime64.so
ln -sf ${ROCMLIB}/libMIOpen.so.1 ${TORCHLIB}/libMIOpen.so
ln -sf ${ROCMLIB}/librccl.so.1 ${TORCHLIB}/librccl.so
ln -sf ${ROCMLIB}/librocblas.so.4 ${TORCHLIB}/librocblas.so
ln -sf ${ROCMLIB}/librocfft.so.0 ${TORCHLIB}/librocfft.so
ln -sf ${ROCMLIB}/librocm_smi64.so.6 ${TORCHLIB}/librocm_smi64.so
ln -sf ${ROCMLIB}/librocrand.so.1 ${TORCHLIB}/librocrand.so
ln -sf ${ROCMLIB}/librocsolver.so.0 ${TORCHLIB}/librocsolver.so
ln -sf ${ROCMLIB}/librocsparse.so.1 ${TORCHLIB}/librocsparse.so
ln -sf ${ROCMLIB}/libroctracer64.so.4 ${TORCHLIB}/libroctracer64.so
ln -sf ${ROCMLIB}/libroctx64.so.4 ${TORCHLIB}/libroctx64.so

rm -rf ${TORCHLIB}/rocblas
ln -sf ${ROCMLIB}/rocblas ${TORCHLIB}/rocblas

rm -rf ${TORCHLIB}/hipblaslt
ln -sf ${ROCMLIB}/hipblaslt ${TORCHLIB}/hipblaslt
12 changes: 12 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,15 @@ def test_image_pipfiles(subtests: pytest_subtests.plugin.SubTests):
pipfile = tomllib.load(fp)
assert "requires" in pipfile, "Pipfile is missing a [[requires]] section"
assert pipfile["requires"]["python_version"] == python, "Pipfile does not declare the expected Python version"


def test_files_that_should_be_same_are_same(subtests: pytest_subtests.plugin.SubTests):
file_groups = {
"ROCm de-vendor script":
[PROJECT_ROOT / "jupyter/rocm/pytorch/ubi9-python-3.9/de-vendor-torch.sh",
PROJECT_ROOT / "runtimes/rocm-pytorch/ubi9-python-3.9/de-vendor-torch.sh"]
}
for group_name, (first_file, *rest) in file_groups.items():
with subtests.test(msg=f"Checking {group_name}"):
for file in rest:
assert first_file.read_text() == file.read_text(), f"The files {first_file} and {file} do not match"

0 comments on commit ec45de1

Please sign in to comment.