Dockerfile.ci

# syntax=docker/dockerfile:1-labs

# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3

FROM ${BASE_IMAGE}

ENV TRANSFORMERS_OFFLINE=0 
ENV HYDRA_FULL_ERROR=1
ENV PYTHONUNBUFFERED=1

# APT packages
RUN <<"EOF" bash -ex
apt-get update
apt-get install -y bc libsox-fmt-all -y 
apt-get clean
EOF

WORKDIR /workspace

# Install NeMo requirements
ARG TE_TAG=bfe21c3d68b0a9951e5716fb520045db53419c5e
ARG MODELOPT_VERSION=0.11.0
ARG MCORE_TAG=02871b4df8c69fac687ab6676c4246e936ce92d0
ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c
RUN \
--mount=type=bind,source=requirements,target=requirements \
--mount=type=bind,source=tools,target=tools \
--mount=type=bind,source=setup.py,target=setup.py \
--mount=type=bind,source=nemo/package_info.py,target=nemo/package_info.py \
--mount=type=bind,source=nemo/__init__.py,target=nemo/__init__.py <<"EOF" bash -ex
pip install --no-cache-dir --no-build-isolation --extra-index-url https://pypi.nvidia.com \
"transformer-engine @ git+https://github.com/NVIDIA/TransformerEngine.git@${TE_TAG}" \
"megatron_core @ git+https://github.com/NVIDIA/Megatron-LM.git@${MCORE_TAG}" \
"nvidia-modelopt[torch]~=${MODELOPT_VERSION}" \
"apex @ git+https://github.com/NVIDIA/apex.git@${APEX_TAG}" \
"llama-index==0.10.43" \
"onnxscript @ git+https://github.com/microsoft/onnxscript" \
-r tools/ctc_segmentation/requirements.txt \
".[all]"

# Megatron Core installation
git clone https://github.com/NVIDIA/Megatron-LM.git && \
pushd Megatron-LM && \
git checkout ${MCORE_TAG} && \
  pushd megatron/core/datasets && \
  make && \
  popd && \
popd
export PYTHONPATH="${PYTHONPATH}:/workspace/Megatron-LM"
EOF

# Copy over NeMo code
COPY ./ ./
RUN <<"EOF" bash -ex
pip install --no-cache-dir --no-build-isolation ".[all]"

# set permission
chmod 777 -R /workspace
EOF

ENV PYTHONPATH="${PYTHONPATH}:/workspace/Megatron-LM"