Skip to content

Commit

Permalink
Merge pull request #651 from allenai/shanea/lumi-torch2.3-2
Browse files Browse the repository at this point in the history
Update LUMI Dockerfile
  • Loading branch information
2015aroras authored Jul 11, 2024
2 parents a101b31 + e0efc20 commit b10ab4b
Showing 1 changed file with 22 additions and 15 deletions.
37 changes: 22 additions & 15 deletions docker/Dockerfile.lumi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:latest
FROM ubuntu:jammy

ENV DEBIAN_FRONTEND=noninteractive
ENV LC_ALL=C.UTF-8
Expand All @@ -21,9 +21,9 @@ RUN apt-get install -y \
vim \
fish \
wget \
unzip \
parallel \
s3cmd \
awscli \
htop \
wget \
fish \
Expand All @@ -38,17 +38,22 @@ RUN apt-get install -y \
gdb \
apt-utils

RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
RUN unzip awscliv2.zip
RUN ./aws/install
RUN rm -r ./aws/

# Install Google tools
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -
RUN apt-get update
RUN apt-get install google-cloud-cli
RUN apt-get install -y google-cloud-cli

# Install ROCm
RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/5.6/ubuntu jammy main" >> /etc/apt/sources.list.d/amdgpu.list && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/5.6 jammy main" >> /etc/apt/sources.list.d/rocm.list && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/6.0.2/ubuntu jammy main" >> /etc/apt/sources.list.d/amdgpu.list && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.0.2 jammy main" >> /etc/apt/sources.list.d/rocm.list && \
echo 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600 && \
apt-get update && \
apt-get install -y rccl rccl-dev rocm-libs rocm-gdb rocm-dev rocm-developer-tools rocm-hip-runtime-dev rocm-utils rocm-hip-sdk && \
Expand Down Expand Up @@ -83,7 +88,7 @@ ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH

# Install torch
RUN pip install --upgrade pip
RUN pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/rocm5.6
RUN pip install --no-cache-dir torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/rocm6.0

# Install DeepSpeed
RUN pip install --no-cache-dir mpi4py
Expand All @@ -95,22 +100,24 @@ RUN cd /opt && \

# Install more dependencies
COPY pyproject.toml .
RUN mkdir olmo && touch olmo/__init__.py && \
pip install --no-cache-dir .[all] && \
RUN mkdir olmo && \
touch olmo/__init__.py && \
echo 'VERSION = "0.1.0"' > olmo/version.py && \
pip install --no-cache-dir .[train] && \
pip uninstall -y ai2-olmo && \
rm -rf olmo/

RUN pip install --no-cache-dir py-spy
RUN pip install --no-cache-dir wandb --upgrade

# # Install flash attention (for MI200 series!)
# RUN cd /opt && \
# git clone --recursive https://github.com/ROCm/flash-attention.git && \
# cd flash-attention && \
# GPU_ARCHS="gfx90a" pip install .

# Cleanup
RUN apt-get autoremove
RUN rm -rf /opt/mpich-3.1.4 /opt/aws-ofi-rccl /opt/DeepSpeed
RUN apt-get clean
RUN pip cache purge

# Install flash attention (for MI200 series!)
RUN cd /opt && \
git clone --recursive https://github.com/ROCm/flash-attention.git && \
cd flash-attention && \
GPU_ARCHS="gfx90a" pip install .
RUN pip cache purge

0 comments on commit b10ab4b

Please sign in to comment.